The first working implementation of cuda kernel for draw_bounding_boxes op helper.
parent
ae09cfee32
commit
30a8af566c
|
@ -28,11 +28,12 @@ namespace helpers {
|
||||||
static __global__ void drawBoundingBoxesKernel(T const* images, Nd4jLong* imagesShape, T const* boxes,
|
static __global__ void drawBoundingBoxesKernel(T const* images, Nd4jLong* imagesShape, T const* boxes,
|
||||||
Nd4jLong* boxesShape, T const* colors, Nd4jLong* colorsShape, T* output, Nd4jLong* outputShape,
|
Nd4jLong* boxesShape, T const* colors, Nd4jLong* colorsShape, T* output, Nd4jLong* outputShape,
|
||||||
Nd4jLong batchSize, Nd4jLong width, Nd4jLong height, Nd4jLong channels, Nd4jLong colorSetSize) {
|
Nd4jLong batchSize, Nd4jLong width, Nd4jLong height, Nd4jLong channels, Nd4jLong colorSetSize) {
|
||||||
|
|
||||||
for (auto b = blockIdx.x; b < batchSize; b += gridDim.x) { // loop by batch
|
for (auto b = blockIdx.x; b < batchSize; b += gridDim.x) { // loop by batch
|
||||||
for (auto c = 0; c < colorSetSize; c += blockDim.x) {
|
for (auto c = threadIdx.x; c < colorSetSize; c += blockDim.x) {
|
||||||
// box with shape
|
// box with shape
|
||||||
auto pos = 0;
|
auto pos = channels * c;
|
||||||
auto internalBox = &boxes[pos];//(*boxes)(b, {0})(c, {0});//internalBoxes->at(c);
|
auto internalBox = &boxes[b * colorSetSize * 4 + c * 4];//(*boxes)(b, {0})(c, {0});//internalBoxes->at(c);
|
||||||
auto color = &colors[pos];//colorSet->at(c);
|
auto color = &colors[pos];//colorSet->at(c);
|
||||||
auto rowStart = nd4j::math::nd4j_max(Nd4jLong (0), Nd4jLong ((height - 1) * internalBox[0]));
|
auto rowStart = nd4j::math::nd4j_max(Nd4jLong (0), Nd4jLong ((height - 1) * internalBox[0]));
|
||||||
auto rowEnd = nd4j::math::nd4j_min(Nd4jLong (height - 1), Nd4jLong ((height - 1) * internalBox[2]));
|
auto rowEnd = nd4j::math::nd4j_min(Nd4jLong (height - 1), Nd4jLong ((height - 1) * internalBox[2]));
|
||||||
|
@ -42,8 +43,8 @@ namespace helpers {
|
||||||
for (auto e = 0; e < channels; ++e) {
|
for (auto e = 0; e < channels; ++e) {
|
||||||
Nd4jLong yMinPos[] = {b, y, colStart, e};
|
Nd4jLong yMinPos[] = {b, y, colStart, e};
|
||||||
Nd4jLong yMaxPos[] = {b, y, colEnd, e};
|
Nd4jLong yMaxPos[] = {b, y, colEnd, e};
|
||||||
auto zIndexYmin = shape::getOffset(outputShape, yMinPos, 4);
|
auto zIndexYmin = shape::getOffset(outputShape, yMinPos, 0);
|
||||||
auto zIndexYmax = shape::getOffset(outputShape, yMaxPos, 4);
|
auto zIndexYmax = shape::getOffset(outputShape, yMaxPos, 0);
|
||||||
output[zIndexYmin] = color[e];
|
output[zIndexYmin] = color[e];
|
||||||
output[zIndexYmax] = color[e];
|
output[zIndexYmax] = color[e];
|
||||||
}
|
}
|
||||||
|
@ -52,8 +53,8 @@ namespace helpers {
|
||||||
for (auto e = 0; e < channels; ++e) {
|
for (auto e = 0; e < channels; ++e) {
|
||||||
Nd4jLong xMinPos[] = {b, rowStart, x, e};
|
Nd4jLong xMinPos[] = {b, rowStart, x, e};
|
||||||
Nd4jLong xMaxPos[] = {b, rowEnd, x, e};
|
Nd4jLong xMaxPos[] = {b, rowEnd, x, e};
|
||||||
auto zIndexXmin = shape::getOffset(outputShape, xMinPos, 4);
|
auto zIndexXmin = shape::getOffset(outputShape, xMinPos, 0);
|
||||||
auto zIndexXmax = shape::getOffset(outputShape, xMaxPos, 4);
|
auto zIndexXmax = shape::getOffset(outputShape, xMaxPos, 0);
|
||||||
output[zIndexXmin] = color[e];
|
output[zIndexXmin] = color[e];
|
||||||
output[zIndexXmax] = color[e];
|
output[zIndexXmax] = color[e];
|
||||||
}
|
}
|
||||||
|
@ -62,6 +63,7 @@ namespace helpers {
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void drawBoundingBoxesH(nd4j::LaunchContext* context, NDArray const* images, NDArray const* boxes, NDArray const* colors, NDArray* output) {
|
void drawBoundingBoxesH(nd4j::LaunchContext* context, NDArray const* images, NDArray const* boxes, NDArray const* colors, NDArray* output) {
|
||||||
auto batchSize = images->sizeAt(0);
|
auto batchSize = images->sizeAt(0);
|
||||||
|
@ -70,14 +72,12 @@ namespace helpers {
|
||||||
auto channels = images->sizeAt(3);
|
auto channels = images->sizeAt(3);
|
||||||
auto stream = context->getCudaStream();
|
auto stream = context->getCudaStream();
|
||||||
auto colorSetSize = colors->sizeAt(0);
|
auto colorSetSize = colors->sizeAt(0);
|
||||||
// auto imageList = images->allTensorsAlongDimension({1, 2, 3}); // split images by batch
|
|
||||||
// auto boxList = boxes->allTensorsAlongDimension({1, 2}); // split boxes by batch
|
|
||||||
// auto colorSet = colors->allTensorsAlongDimension({1});
|
|
||||||
auto imagesBuf = images->getDataBuffer()->specialAsT<T>();
|
auto imagesBuf = images->getDataBuffer()->specialAsT<T>();
|
||||||
auto boxesBuf = boxes->getDataBuffer()->specialAsT<T>();
|
auto boxesBuf = boxes->getDataBuffer()->specialAsT<T>();
|
||||||
auto colorsBuf = colors->getDataBuffer()->specialAsT<T>();
|
auto colorsBuf = colors->getDataBuffer()->specialAsT<T>();
|
||||||
auto outputBuf = output->dataBuffer()->specialAsT<T>();
|
auto outputBuf = output->dataBuffer()->specialAsT<T>();
|
||||||
drawBoundingBoxesKernel<<<128, 256, 1024, *stream>>>(imagesBuf, images->getSpecialShapeInfo(),
|
drawBoundingBoxesKernel<<<1, 1, 1024, *stream>>>(imagesBuf, images->getSpecialShapeInfo(),
|
||||||
boxesBuf, boxes->getSpecialShapeInfo(), colorsBuf, colors->getSpecialShapeInfo(),
|
boxesBuf, boxes->getSpecialShapeInfo(), colorsBuf, colors->getSpecialShapeInfo(),
|
||||||
outputBuf, output->specialShapeInfo(), batchSize, width, height, channels, colorSetSize);
|
outputBuf, output->specialShapeInfo(), batchSize, width, height, channels, colorSetSize);
|
||||||
}
|
}
|
||||||
|
@ -90,9 +90,10 @@ namespace helpers {
|
||||||
// height = images->sizeAt(1), width = images->sizeAt(2)
|
// height = images->sizeAt(1), width = images->sizeAt(2)
|
||||||
// colors - colors for each box given
|
// colors - colors for each box given
|
||||||
// set up color for each box as frame
|
// set up color for each box as frame
|
||||||
|
NDArray::prepareSpecialUse({output}, {images, boxes, colors});
|
||||||
output->assign(images);
|
output->assign(images);
|
||||||
BUILD_SINGLE_SELECTOR(output->dataType(), drawBoundingBoxesH, (context, images, boxes, colors, output), FLOAT_TYPES);
|
BUILD_SINGLE_SELECTOR(output->dataType(), drawBoundingBoxesH, (context, images, boxes, colors, output), FLOAT_TYPES);
|
||||||
|
NDArray::registerSpecialUse({output}, {images, boxes, colors});
|
||||||
}
|
}
|
||||||
BUILD_SINGLE_TEMPLATE(template void drawBoundingBoxesH, (nd4j::LaunchContext* context, NDArray const* images, NDArray const* boxes, NDArray const* colors, NDArray* output), FLOAT_TYPES);
|
BUILD_SINGLE_TEMPLATE(template void drawBoundingBoxesH, (nd4j::LaunchContext* context, NDArray const* images, NDArray const* boxes, NDArray const* colors, NDArray* output), FLOAT_TYPES);
|
||||||
}
|
}
|
||||||
|
|
|
@ -2104,7 +2104,9 @@ TEST_F(DeclarableOpsTests10, Image_DrawBoundingBoxes_2) {
|
||||||
ASSERT_EQ(ND4J_STATUS_OK, results->status());
|
ASSERT_EQ(ND4J_STATUS_OK, results->status());
|
||||||
|
|
||||||
auto result = results->at(0);
|
auto result = results->at(0);
|
||||||
result->printIndexedBuffer("Bounded boxes 2");
|
// result->syncToHost();
|
||||||
|
// result->printBuffer("Bounded boxes 2");
|
||||||
|
// expected.printBuffer("Bounded expec 2");
|
||||||
ASSERT_TRUE(expected.isSameShapeStrict(result));
|
ASSERT_TRUE(expected.isSameShapeStrict(result));
|
||||||
ASSERT_TRUE(expected.equalsTo(result));
|
ASSERT_TRUE(expected.equalsTo(result));
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue