DL4J/DataVec: Fix Yolo2OutputLayer and ObjectDetectionRecordReader support for NHWC data format (#483)
* Fix Yolo2OutputLayer for NHWC data format Signed-off-by: Alex Black <blacka101@gmail.com> * ObjectDetectionRecordReader NHWC support Signed-off-by: Alex Black <blacka101@gmail.com>master
parent
45ebd4899c
commit
ee3e059b12
|
@ -49,7 +49,7 @@ import static org.nd4j.linalg.indexing.NDArrayIndex.point;
|
||||||
/**
|
/**
|
||||||
* An image record reader for object detection.
|
* An image record reader for object detection.
|
||||||
* <p>
|
* <p>
|
||||||
* Format of returned values: 4d array, with dimensions [minibatch, 4+C, h, w]
|
* Format of returned values: 4d array, with dimensions [minibatch, 4+C, h, w] (nchw) or [minibatch, h, w, 4+C] (nhwc)
|
||||||
* Where the image is quantized into h x w grid locations.
|
* Where the image is quantized into h x w grid locations.
|
||||||
* <p>
|
* <p>
|
||||||
* Note that this matches the format required for Deeplearning4j's Yolo2OutputLayer
|
* Note that this matches the format required for Deeplearning4j's Yolo2OutputLayer
|
||||||
|
@ -61,42 +61,67 @@ public class ObjectDetectionRecordReader extends BaseImageRecordReader {
|
||||||
private final int gridW;
|
private final int gridW;
|
||||||
private final int gridH;
|
private final int gridH;
|
||||||
private final ImageObjectLabelProvider labelProvider;
|
private final ImageObjectLabelProvider labelProvider;
|
||||||
|
private final boolean nchw;
|
||||||
|
|
||||||
protected Image currentImage;
|
protected Image currentImage;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
* As per {@link #ObjectDetectionRecordReader(int, int, int, int, int, boolean, ImageObjectLabelProvider)} but hardcoded
|
||||||
|
* to NCHW format
|
||||||
|
*/
|
||||||
|
public ObjectDetectionRecordReader(int height, int width, int channels, int gridH, int gridW, ImageObjectLabelProvider labelProvider) {
|
||||||
|
this(height, width, channels, gridH, gridW, true, labelProvider);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create ObjectDetectionRecordReader with
|
||||||
*
|
*
|
||||||
* @param height Height of the output images
|
* @param height Height of the output images
|
||||||
* @param width Width of the output images
|
* @param width Width of the output images
|
||||||
* @param channels Number of channels for the output images
|
* @param channels Number of channels for the output images
|
||||||
* @param gridH Grid/quantization size (along height dimension) - Y axis
|
* @param gridH Grid/quantization size (along height dimension) - Y axis
|
||||||
* @param gridW Grid/quantization size (along height dimension) - X axis
|
* @param gridW Grid/quantization size (along height dimension) - X axis
|
||||||
|
* @param nchw If true: return NCHW format labels with array shape [minibatch, 4+C, h, w]; if false, return
|
||||||
|
* NHWC format labels with array shape [minibatch, h, w, 4+C]
|
||||||
* @param labelProvider ImageObjectLabelProvider - used to look up which objects are in each image
|
* @param labelProvider ImageObjectLabelProvider - used to look up which objects are in each image
|
||||||
*/
|
*/
|
||||||
public ObjectDetectionRecordReader(int height, int width, int channels, int gridH, int gridW, ImageObjectLabelProvider labelProvider) {
|
public ObjectDetectionRecordReader(int height, int width, int channels, int gridH, int gridW, boolean nchw, ImageObjectLabelProvider labelProvider) {
|
||||||
super(height, width, channels, null, null);
|
super(height, width, channels, null, null);
|
||||||
this.gridW = gridW;
|
this.gridW = gridW;
|
||||||
this.gridH = gridH;
|
this.gridH = gridH;
|
||||||
|
this.nchw = nchw;
|
||||||
this.labelProvider = labelProvider;
|
this.labelProvider = labelProvider;
|
||||||
this.appendLabel = labelProvider != null;
|
this.appendLabel = labelProvider != null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* When imageTransform != null, object is removed if new center is outside of transformed image bounds.
|
* As per {@link #ObjectDetectionRecordReader(int, int, int, int, int, boolean, ImageObjectLabelProvider, ImageTransform)}
|
||||||
*
|
* but hardcoded to NCHW format
|
||||||
* @param height Height of the output images
|
|
||||||
* @param width Width of the output images
|
|
||||||
* @param channels Number of channels for the output images
|
|
||||||
* @param gridH Grid/quantization size (along height dimension) - Y axis
|
|
||||||
* @param gridW Grid/quantization size (along height dimension) - X axis
|
|
||||||
* @param labelProvider ImageObjectLabelProvider - used to look up which objects are in each image
|
|
||||||
* @param imageTransform ImageTransform - used to transform image and coordinates
|
|
||||||
*/
|
*/
|
||||||
public ObjectDetectionRecordReader(int height, int width, int channels, int gridH, int gridW,
|
public ObjectDetectionRecordReader(int height, int width, int channels, int gridH, int gridW,
|
||||||
ImageObjectLabelProvider labelProvider, ImageTransform imageTransform) {
|
ImageObjectLabelProvider labelProvider, ImageTransform imageTransform) {
|
||||||
|
this(height, width, channels, gridH, gridW, true, labelProvider, imageTransform);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* When imageTransform != null, object is removed if new center is outside of transformed image bounds.
|
||||||
|
*
|
||||||
|
* @param height Height of the output images
|
||||||
|
* @param width Width of the output images
|
||||||
|
* @param channels Number of channels for the output images
|
||||||
|
* @param gridH Grid/quantization size (along height dimension) - Y axis
|
||||||
|
* @param gridW Grid/quantization size (along height dimension) - X axis
|
||||||
|
* @param labelProvider ImageObjectLabelProvider - used to look up which objects are in each image
|
||||||
|
* @param nchw If true: return NCHW format labels with array shape [minibatch, 4+C, h, w]; if false, return
|
||||||
|
* NHWC format labels with array shape [minibatch, h, w, 4+C]
|
||||||
|
* @param imageTransform ImageTransform - used to transform image and coordinates
|
||||||
|
*/
|
||||||
|
public ObjectDetectionRecordReader(int height, int width, int channels, int gridH, int gridW, boolean nchw,
|
||||||
|
ImageObjectLabelProvider labelProvider, ImageTransform imageTransform) {
|
||||||
super(height, width, channels, null, null);
|
super(height, width, channels, null, null);
|
||||||
this.gridW = gridW;
|
this.gridW = gridW;
|
||||||
this.gridH = gridH;
|
this.gridH = gridH;
|
||||||
|
this.nchw = nchw;
|
||||||
this.labelProvider = labelProvider;
|
this.labelProvider = labelProvider;
|
||||||
this.appendLabel = labelProvider != null;
|
this.appendLabel = labelProvider != null;
|
||||||
this.imageTransform = imageTransform;
|
this.imageTransform = imageTransform;
|
||||||
|
@ -182,6 +207,10 @@ public class ObjectDetectionRecordReader extends BaseImageRecordReader {
|
||||||
exampleNum++;
|
exampleNum++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(!nchw) {
|
||||||
|
outImg = outImg.permute(0, 2, 3, 1); //NCHW to NHWC
|
||||||
|
outLabel = outLabel.permute(0, 2, 3, 1);
|
||||||
|
}
|
||||||
return new NDArrayRecordBatch(Arrays.asList(outImg, outLabel));
|
return new NDArrayRecordBatch(Arrays.asList(outImg, outLabel));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -256,6 +285,8 @@ public class ObjectDetectionRecordReader extends BaseImageRecordReader {
|
||||||
imageLoader = new NativeImageLoader(height, width, channels, imageTransform);
|
imageLoader = new NativeImageLoader(height, width, channels, imageTransform);
|
||||||
}
|
}
|
||||||
Image image = this.imageLoader.asImageMatrix(dataInputStream);
|
Image image = this.imageLoader.asImageMatrix(dataInputStream);
|
||||||
|
if(!nchw)
|
||||||
|
image.setImage(image.getImage().permute(0,2,3,1));
|
||||||
Nd4j.getAffinityManager().ensureLocation(image.getImage(), AffinityManager.Location.DEVICE);
|
Nd4j.getAffinityManager().ensureLocation(image.getImage(), AffinityManager.Location.DEVICE);
|
||||||
|
|
||||||
List<Writable> ret = RecordConverter.toRecord(image.getImage());
|
List<Writable> ret = RecordConverter.toRecord(image.getImage());
|
||||||
|
@ -264,6 +295,8 @@ public class ObjectDetectionRecordReader extends BaseImageRecordReader {
|
||||||
int nClasses = labels.size();
|
int nClasses = labels.size();
|
||||||
INDArray outLabel = Nd4j.create(1, 4 + nClasses, gridH, gridW);
|
INDArray outLabel = Nd4j.create(1, 4 + nClasses, gridH, gridW);
|
||||||
label(image, imageObjectsForPath, outLabel, 0);
|
label(image, imageObjectsForPath, outLabel, 0);
|
||||||
|
if(!nchw)
|
||||||
|
outLabel = outLabel.permute(0,2,3,1); //NCHW to NHWC
|
||||||
ret.add(new NDArrayWritable(outLabel));
|
ret.add(new NDArrayWritable(outLabel));
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
|
|
|
@ -56,168 +56,179 @@ public class TestObjectDetectionRecordReader {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void test() throws Exception {
|
public void test() throws Exception {
|
||||||
ImageObjectLabelProvider lp = new TestImageObjectDetectionLabelProvider();
|
for(boolean nchw : new boolean[]{true, false}) {
|
||||||
|
ImageObjectLabelProvider lp = new TestImageObjectDetectionLabelProvider();
|
||||||
|
|
||||||
File f = testDir.newFolder();
|
File f = testDir.newFolder();
|
||||||
new ClassPathResource("datavec-data-image/objdetect/").copyDirectory(f);
|
new ClassPathResource("datavec-data-image/objdetect/").copyDirectory(f);
|
||||||
|
|
||||||
String path = new File(f, "000012.jpg").getParent();
|
String path = new File(f, "000012.jpg").getParent();
|
||||||
|
|
||||||
int h = 32;
|
int h = 32;
|
||||||
int w = 32;
|
int w = 32;
|
||||||
int c = 3;
|
int c = 3;
|
||||||
int gW = 13;
|
int gW = 13;
|
||||||
int gH = 10;
|
int gH = 10;
|
||||||
|
|
||||||
//Enforce consistent iteration order for tests
|
//Enforce consistent iteration order for tests
|
||||||
URI[] u = new FileSplit(new File(path)).locations();
|
URI[] u = new FileSplit(new File(path)).locations();
|
||||||
Arrays.sort(u);
|
Arrays.sort(u);
|
||||||
|
|
||||||
RecordReader rr = new ObjectDetectionRecordReader(h, w, c, gH, gW, lp);
|
RecordReader rr = new ObjectDetectionRecordReader(h, w, c, gH, gW, nchw, lp);
|
||||||
rr.initialize(new CollectionInputSplit(u));
|
rr.initialize(new CollectionInputSplit(u));
|
||||||
|
|
||||||
RecordReader imgRR = new ImageRecordReader(h, w, c);
|
RecordReader imgRR = new ImageRecordReader(h, w, c, nchw);
|
||||||
imgRR.initialize(new CollectionInputSplit(u));
|
imgRR.initialize(new CollectionInputSplit(u));
|
||||||
|
|
||||||
List<String> labels = rr.getLabels();
|
List<String> labels = rr.getLabels();
|
||||||
assertEquals(Arrays.asList("car", "cat"), labels);
|
assertEquals(Arrays.asList("car", "cat"), labels);
|
||||||
|
|
||||||
|
|
||||||
//000012.jpg - originally 500x333
|
//000012.jpg - originally 500x333
|
||||||
//000019.jpg - originally 500x375
|
//000019.jpg - originally 500x375
|
||||||
double[] origW = new double[]{500, 500};
|
double[] origW = new double[]{500, 500};
|
||||||
double[] origH = new double[]{333, 375};
|
double[] origH = new double[]{333, 375};
|
||||||
List<List<ImageObject>> l = Arrays.asList(
|
List<List<ImageObject>> l = Arrays.asList(
|
||||||
Collections.singletonList(new ImageObject(156, 97, 351, 270, "car")),
|
Collections.singletonList(new ImageObject(156, 97, 351, 270, "car")),
|
||||||
Arrays.asList(new ImageObject(11, 113, 266, 259, "cat"), new ImageObject(231, 88, 483, 256, "cat"))
|
Arrays.asList(new ImageObject(11, 113, 266, 259, "cat"), new ImageObject(231, 88, 483, 256, "cat"))
|
||||||
);
|
);
|
||||||
|
|
||||||
for (int idx = 0; idx < 2; idx++) {
|
for (int idx = 0; idx < 2; idx++) {
|
||||||
assertTrue(rr.hasNext());
|
assertTrue(rr.hasNext());
|
||||||
List<Writable> next = rr.next();
|
List<Writable> next = rr.next();
|
||||||
List<Writable> nextImgRR = imgRR.next();
|
List<Writable> nextImgRR = imgRR.next();
|
||||||
|
|
||||||
//Check features:
|
//Check features:
|
||||||
assertEquals(next.get(0), nextImgRR.get(0));
|
assertEquals(next.get(0), nextImgRR.get(0));
|
||||||
|
|
||||||
//Check labels
|
//Check labels
|
||||||
assertEquals(2, next.size());
|
assertEquals(2, next.size());
|
||||||
assertTrue(next.get(0) instanceof NDArrayWritable);
|
assertTrue(next.get(0) instanceof NDArrayWritable);
|
||||||
assertTrue(next.get(1) instanceof NDArrayWritable);
|
assertTrue(next.get(1) instanceof NDArrayWritable);
|
||||||
|
|
||||||
List<ImageObject> objects = l.get(idx);
|
List<ImageObject> objects = l.get(idx);
|
||||||
|
|
||||||
INDArray expLabels = Nd4j.create(1, 4 + 2, gH, gW);
|
INDArray expLabels = Nd4j.create(1, 4 + 2, gH, gW);
|
||||||
for (ImageObject io : objects) {
|
for (ImageObject io : objects) {
|
||||||
double fracImageX1 = io.getX1() / origW[idx];
|
double fracImageX1 = io.getX1() / origW[idx];
|
||||||
double fracImageY1 = io.getY1() / origH[idx];
|
double fracImageY1 = io.getY1() / origH[idx];
|
||||||
double fracImageX2 = io.getX2() / origW[idx];
|
double fracImageX2 = io.getX2() / origW[idx];
|
||||||
double fracImageY2 = io.getY2() / origH[idx];
|
double fracImageY2 = io.getY2() / origH[idx];
|
||||||
|
|
||||||
double x1C = (fracImageX1 + fracImageX2) / 2.0;
|
double x1C = (fracImageX1 + fracImageX2) / 2.0;
|
||||||
double y1C = (fracImageY1 + fracImageY2) / 2.0;
|
double y1C = (fracImageY1 + fracImageY2) / 2.0;
|
||||||
|
|
||||||
int labelGridX = (int) (x1C * gW);
|
int labelGridX = (int) (x1C * gW);
|
||||||
int labelGridY = (int) (y1C * gH);
|
int labelGridY = (int) (y1C * gH);
|
||||||
|
|
||||||
int labelIdx;
|
int labelIdx;
|
||||||
if (io.getLabel().equals("car")) {
|
if (io.getLabel().equals("car")) {
|
||||||
labelIdx = 4;
|
labelIdx = 4;
|
||||||
} else {
|
} else {
|
||||||
labelIdx = 5;
|
labelIdx = 5;
|
||||||
|
}
|
||||||
|
expLabels.putScalar(0, labelIdx, labelGridY, labelGridX, 1.0);
|
||||||
|
|
||||||
|
expLabels.putScalar(0, 0, labelGridY, labelGridX, fracImageX1 * gW);
|
||||||
|
expLabels.putScalar(0, 1, labelGridY, labelGridX, fracImageY1 * gH);
|
||||||
|
expLabels.putScalar(0, 2, labelGridY, labelGridX, fracImageX2 * gW);
|
||||||
|
expLabels.putScalar(0, 3, labelGridY, labelGridX, fracImageY2 * gH);
|
||||||
}
|
}
|
||||||
expLabels.putScalar(0, labelIdx, labelGridY, labelGridX, 1.0);
|
|
||||||
|
|
||||||
expLabels.putScalar(0, 0, labelGridY, labelGridX, fracImageX1 * gW);
|
INDArray lArr = ((NDArrayWritable) next.get(1)).get();
|
||||||
expLabels.putScalar(0, 1, labelGridY, labelGridX, fracImageY1 * gH);
|
if(nchw) {
|
||||||
expLabels.putScalar(0, 2, labelGridY, labelGridX, fracImageX2 * gW);
|
assertArrayEquals(new long[]{1, 4 + 2, gH, gW}, lArr.shape());
|
||||||
expLabels.putScalar(0, 3, labelGridY, labelGridX, fracImageY2 * gH);
|
} else {
|
||||||
|
assertArrayEquals(new long[]{1, gH, gW, 4 + 2}, lArr.shape());
|
||||||
|
}
|
||||||
|
|
||||||
|
if(!nchw)
|
||||||
|
expLabels = expLabels.permute(0,2,3,1); //NCHW to NHWC
|
||||||
|
|
||||||
|
assertEquals(expLabels, lArr);
|
||||||
}
|
}
|
||||||
|
|
||||||
INDArray lArr = ((NDArrayWritable) next.get(1)).get();
|
rr.reset();
|
||||||
assertArrayEquals(new long[]{1, 4 + 2, gH, gW}, lArr.shape());
|
Record record = rr.nextRecord();
|
||||||
assertEquals(expLabels, lArr);
|
RecordMetaDataImageURI metadata = (RecordMetaDataImageURI) record.getMetaData();
|
||||||
}
|
assertEquals(new File(path, "000012.jpg"), new File(metadata.getURI()));
|
||||||
|
assertEquals(3, metadata.getOrigC());
|
||||||
|
assertEquals((int) origH[0], metadata.getOrigH());
|
||||||
|
assertEquals((int) origW[0], metadata.getOrigW());
|
||||||
|
|
||||||
rr.reset();
|
List<Record> out = new ArrayList<>();
|
||||||
Record record = rr.nextRecord();
|
List<RecordMetaData> meta = new ArrayList<>();
|
||||||
RecordMetaDataImageURI metadata = (RecordMetaDataImageURI)record.getMetaData();
|
out.add(record);
|
||||||
assertEquals(new File(path, "000012.jpg"), new File(metadata.getURI()));
|
meta.add(metadata);
|
||||||
assertEquals(3, metadata.getOrigC());
|
record = rr.nextRecord();
|
||||||
assertEquals((int)origH[0], metadata.getOrigH());
|
metadata = (RecordMetaDataImageURI) record.getMetaData();
|
||||||
assertEquals((int)origW[0], metadata.getOrigW());
|
out.add(record);
|
||||||
|
meta.add(metadata);
|
||||||
|
|
||||||
List<Record> out = new ArrayList<>();
|
List<Record> fromMeta = rr.loadFromMetaData(meta);
|
||||||
List<RecordMetaData> meta = new ArrayList<>();
|
assertEquals(out, fromMeta);
|
||||||
out.add(record);
|
|
||||||
meta.add(metadata);
|
|
||||||
record = rr.nextRecord();
|
|
||||||
metadata = (RecordMetaDataImageURI)record.getMetaData();
|
|
||||||
out.add(record);
|
|
||||||
meta.add(metadata);
|
|
||||||
|
|
||||||
List<Record> fromMeta = rr.loadFromMetaData(meta);
|
// make sure we don't lose objects just by explicitly resizing
|
||||||
assertEquals(out, fromMeta);
|
int i = 0;
|
||||||
|
int[] nonzeroCount = {5, 10};
|
||||||
|
|
||||||
// make sure we don't lose objects just by explicitly resizing
|
ImageTransform transform = new ResizeImageTransform(37, 42);
|
||||||
int i = 0;
|
RecordReader rrTransform = new ObjectDetectionRecordReader(42, 37, c, gH, gW, nchw, lp, transform);
|
||||||
int[] nonzeroCount = {5, 10};
|
rrTransform.initialize(new CollectionInputSplit(u));
|
||||||
|
i = 0;
|
||||||
|
while (rrTransform.hasNext()) {
|
||||||
|
List<Writable> next = rrTransform.next();
|
||||||
|
assertEquals(37, transform.getCurrentImage().getWidth());
|
||||||
|
assertEquals(42, transform.getCurrentImage().getHeight());
|
||||||
|
INDArray labelArray = ((NDArrayWritable) next.get(1)).get();
|
||||||
|
BooleanIndexing.replaceWhere(labelArray, 1, Conditions.notEquals(0));
|
||||||
|
assertEquals(nonzeroCount[i++], labelArray.sum().getInt(0));
|
||||||
|
}
|
||||||
|
|
||||||
ImageTransform transform = new ResizeImageTransform(37, 42);
|
ImageTransform transform2 = new ResizeImageTransform(1024, 2048);
|
||||||
RecordReader rrTransform = new ObjectDetectionRecordReader(42, 37, c, gH, gW, lp, transform);
|
RecordReader rrTransform2 = new ObjectDetectionRecordReader(2048, 1024, c, gH, gW, nchw, lp, transform2);
|
||||||
rrTransform.initialize(new CollectionInputSplit(u));
|
rrTransform2.initialize(new CollectionInputSplit(u));
|
||||||
i = 0;
|
i = 0;
|
||||||
while (rrTransform.hasNext()) {
|
while (rrTransform2.hasNext()) {
|
||||||
List<Writable> next = rrTransform.next();
|
List<Writable> next = rrTransform2.next();
|
||||||
assertEquals(37, transform.getCurrentImage().getWidth());
|
assertEquals(1024, transform2.getCurrentImage().getWidth());
|
||||||
assertEquals(42, transform.getCurrentImage().getHeight());
|
assertEquals(2048, transform2.getCurrentImage().getHeight());
|
||||||
INDArray labelArray = ((NDArrayWritable)next.get(1)).get();
|
INDArray labelArray = ((NDArrayWritable) next.get(1)).get();
|
||||||
BooleanIndexing.replaceWhere(labelArray, 1, Conditions.notEquals(0));
|
BooleanIndexing.replaceWhere(labelArray, 1, Conditions.notEquals(0));
|
||||||
assertEquals(nonzeroCount[i++], labelArray.sum().getInt(0));
|
assertEquals(nonzeroCount[i++], labelArray.sum().getInt(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//Make sure image flip does not break labels and are correct for new image size dimensions:
|
||||||
|
ImageTransform transform3 = new PipelineImageTransform(
|
||||||
|
new ResizeImageTransform(2048, 4096),
|
||||||
|
new FlipImageTransform(-1)
|
||||||
|
);
|
||||||
|
RecordReader rrTransform3 = new ObjectDetectionRecordReader(2048, 1024, c, gH, gW, nchw, lp, transform3);
|
||||||
|
rrTransform3.initialize(new CollectionInputSplit(u));
|
||||||
|
i = 0;
|
||||||
|
while (rrTransform3.hasNext()) {
|
||||||
|
List<Writable> next = rrTransform3.next();
|
||||||
|
INDArray labelArray = ((NDArrayWritable) next.get(1)).get();
|
||||||
|
BooleanIndexing.replaceWhere(labelArray, 1, Conditions.notEquals(0));
|
||||||
|
assertEquals(nonzeroCount[i++], labelArray.sum().getInt(0));
|
||||||
|
}
|
||||||
|
|
||||||
|
//Test that doing a downscale with the native image loader directly instead of a transform does not cause an exception:
|
||||||
|
ImageTransform transform4 = new FlipImageTransform(-1);
|
||||||
|
RecordReader rrTransform4 = new ObjectDetectionRecordReader(128, 128, c, gH, gW, nchw, lp, transform4);
|
||||||
|
rrTransform4.initialize(new CollectionInputSplit(u));
|
||||||
|
i = 0;
|
||||||
|
while (rrTransform4.hasNext()) {
|
||||||
|
List<Writable> next = rrTransform4.next();
|
||||||
|
|
||||||
|
assertEquals((int) origW[i], transform4.getCurrentImage().getWidth());
|
||||||
|
assertEquals((int) origH[i], transform4.getCurrentImage().getHeight());
|
||||||
|
|
||||||
|
INDArray labelArray = ((NDArrayWritable) next.get(1)).get();
|
||||||
|
BooleanIndexing.replaceWhere(labelArray, 1, Conditions.notEquals(0));
|
||||||
|
assertEquals(nonzeroCount[i++], labelArray.sum().getInt(0));
|
||||||
|
}
|
||||||
|
|
||||||
ImageTransform transform2 = new ResizeImageTransform(1024, 2048);
|
|
||||||
RecordReader rrTransform2 = new ObjectDetectionRecordReader(2048, 1024, c, gH, gW, lp, transform2);
|
|
||||||
rrTransform2.initialize(new CollectionInputSplit(u));
|
|
||||||
i = 0;
|
|
||||||
while (rrTransform2.hasNext()) {
|
|
||||||
List<Writable> next = rrTransform2.next();
|
|
||||||
assertEquals(1024, transform2.getCurrentImage().getWidth());
|
|
||||||
assertEquals(2048, transform2.getCurrentImage().getHeight());
|
|
||||||
INDArray labelArray = ((NDArrayWritable)next.get(1)).get();
|
|
||||||
BooleanIndexing.replaceWhere(labelArray, 1, Conditions.notEquals(0));
|
|
||||||
assertEquals(nonzeroCount[i++], labelArray.sum().getInt(0));
|
|
||||||
}
|
|
||||||
|
|
||||||
//Make sure image flip does not break labels and are correct for new image size dimensions:
|
|
||||||
ImageTransform transform3 = new PipelineImageTransform(
|
|
||||||
new ResizeImageTransform(2048, 4096),
|
|
||||||
new FlipImageTransform(-1)
|
|
||||||
);
|
|
||||||
RecordReader rrTransform3 = new ObjectDetectionRecordReader(2048, 1024, c, gH, gW, lp, transform3);
|
|
||||||
rrTransform3.initialize(new CollectionInputSplit(u));
|
|
||||||
i = 0;
|
|
||||||
while (rrTransform3.hasNext()) {
|
|
||||||
List<Writable> next = rrTransform3.next();
|
|
||||||
INDArray labelArray = ((NDArrayWritable)next.get(1)).get();
|
|
||||||
BooleanIndexing.replaceWhere(labelArray, 1, Conditions.notEquals(0));
|
|
||||||
assertEquals(nonzeroCount[i++], labelArray.sum().getInt(0));
|
|
||||||
}
|
|
||||||
|
|
||||||
//Test that doing a downscale with the native image loader directly instead of a transform does not cause an exception:
|
|
||||||
ImageTransform transform4 = new FlipImageTransform(-1);
|
|
||||||
RecordReader rrTransform4 = new ObjectDetectionRecordReader(128, 128, c, gH, gW, lp, transform4);
|
|
||||||
rrTransform4.initialize(new CollectionInputSplit(u));
|
|
||||||
i = 0;
|
|
||||||
while (rrTransform4.hasNext()) {
|
|
||||||
List<Writable> next = rrTransform4.next();
|
|
||||||
|
|
||||||
assertEquals((int) origW[i], transform4.getCurrentImage().getWidth());
|
|
||||||
assertEquals((int) origH[i], transform4.getCurrentImage().getHeight());
|
|
||||||
|
|
||||||
INDArray labelArray = ((NDArrayWritable)next.get(1)).get();
|
|
||||||
BooleanIndexing.replaceWhere(labelArray, 1, Conditions.notEquals(0));
|
|
||||||
assertEquals(nonzeroCount[i++], labelArray.sum().getInt(0));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -24,9 +24,7 @@ import org.datavec.image.recordreader.objdetect.impl.VocLabelProvider;
|
||||||
import org.deeplearning4j.BaseDL4JTest;
|
import org.deeplearning4j.BaseDL4JTest;
|
||||||
import org.deeplearning4j.TestUtils;
|
import org.deeplearning4j.TestUtils;
|
||||||
import org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator;
|
import org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator;
|
||||||
import org.deeplearning4j.nn.conf.ConvolutionMode;
|
import org.deeplearning4j.nn.conf.*;
|
||||||
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
|
|
||||||
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
|
|
||||||
import org.deeplearning4j.nn.conf.distribution.GaussianDistribution;
|
import org.deeplearning4j.nn.conf.distribution.GaussianDistribution;
|
||||||
import org.deeplearning4j.nn.conf.inputs.InputType;
|
import org.deeplearning4j.nn.conf.inputs.InputType;
|
||||||
import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
|
import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
|
||||||
|
@ -36,6 +34,8 @@ import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
|
||||||
import org.junit.Rule;
|
import org.junit.Rule;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import org.junit.rules.TemporaryFolder;
|
import org.junit.rules.TemporaryFolder;
|
||||||
|
import org.junit.runner.RunWith;
|
||||||
|
import org.junit.runners.Parameterized;
|
||||||
import org.nd4j.linalg.activations.Activation;
|
import org.nd4j.linalg.activations.Activation;
|
||||||
import org.nd4j.linalg.api.buffer.DataType;
|
import org.nd4j.linalg.api.buffer.DataType;
|
||||||
import org.nd4j.linalg.api.ndarray.INDArray;
|
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
|
@ -50,17 +50,28 @@ import java.io.File;
|
||||||
import java.io.FileOutputStream;
|
import java.io.FileOutputStream;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertArrayEquals;
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author Alex Black
|
* @author Alex Black
|
||||||
*/
|
*/
|
||||||
|
@RunWith(Parameterized.class)
|
||||||
public class YoloGradientCheckTests extends BaseDL4JTest {
|
public class YoloGradientCheckTests extends BaseDL4JTest {
|
||||||
|
|
||||||
static {
|
static {
|
||||||
Nd4j.setDataType(DataType.DOUBLE);
|
Nd4j.setDataType(DataType.DOUBLE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private CNN2DFormat format;
|
||||||
|
public YoloGradientCheckTests(CNN2DFormat format){
|
||||||
|
this.format = format;
|
||||||
|
}
|
||||||
|
@Parameterized.Parameters(name = "{0}")
|
||||||
|
public static Object[] params(){
|
||||||
|
return CNN2DFormat.values();
|
||||||
|
}
|
||||||
|
|
||||||
@Rule
|
@Rule
|
||||||
public TemporaryFolder testDir = new TemporaryFolder();
|
public TemporaryFolder testDir = new TemporaryFolder();
|
||||||
|
|
||||||
|
@ -97,8 +108,14 @@ public class YoloGradientCheckTests extends BaseDL4JTest {
|
||||||
|
|
||||||
Nd4j.getRandom().setSeed(12345);
|
Nd4j.getRandom().setSeed(12345);
|
||||||
|
|
||||||
INDArray input = Nd4j.rand(new int[]{mb, depthIn, h, w});
|
INDArray input, labels;
|
||||||
INDArray labels = yoloLabels(mb, c, h, w);
|
if(format == CNN2DFormat.NCHW){
|
||||||
|
input = Nd4j.rand(DataType.DOUBLE, mb, depthIn, h, w);
|
||||||
|
labels = yoloLabels(mb, c, h, w);
|
||||||
|
} else {
|
||||||
|
input = Nd4j.rand(DataType.DOUBLE, mb, h, w, depthIn);
|
||||||
|
labels = yoloLabels(mb, c, h, w).permute(0,2,3,1);
|
||||||
|
}
|
||||||
|
|
||||||
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
|
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
|
||||||
.dataType(DataType.DOUBLE)
|
.dataType(DataType.DOUBLE)
|
||||||
|
@ -112,6 +129,7 @@ public class YoloGradientCheckTests extends BaseDL4JTest {
|
||||||
.layer(new Yolo2OutputLayer.Builder()
|
.layer(new Yolo2OutputLayer.Builder()
|
||||||
.boundingBoxPriors(bbPrior)
|
.boundingBoxPriors(bbPrior)
|
||||||
.build())
|
.build())
|
||||||
|
.setInputType(InputType.convolutional(h, w, depthIn, format))
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
MultiLayerNetwork net = new MultiLayerNetwork(conf);
|
MultiLayerNetwork net = new MultiLayerNetwork(conf);
|
||||||
|
@ -120,7 +138,18 @@ public class YoloGradientCheckTests extends BaseDL4JTest {
|
||||||
String msg = "testYoloOutputLayer() - minibatch = " + mb + ", w=" + w + ", h=" + h + ", l1=" + l1[i] + ", l2=" + l2[i];
|
String msg = "testYoloOutputLayer() - minibatch = " + mb + ", w=" + w + ", h=" + h + ", l1=" + l1[i] + ", l2=" + l2[i];
|
||||||
System.out.println(msg);
|
System.out.println(msg);
|
||||||
|
|
||||||
|
INDArray out = net.output(input);
|
||||||
|
if(format == CNN2DFormat.NCHW){
|
||||||
|
assertArrayEquals(new long[]{mb, yoloDepth, h, w}, out.shape());
|
||||||
|
} else {
|
||||||
|
assertArrayEquals(new long[]{mb, h, w, yoloDepth}, out.shape());
|
||||||
|
}
|
||||||
|
|
||||||
|
net.fit(input, labels);
|
||||||
|
|
||||||
|
|
||||||
boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.MLNConfig().net(net).input(input)
|
boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.MLNConfig().net(net).input(input)
|
||||||
|
.minAbsoluteError(1e-6)
|
||||||
.labels(labels).subset(true).maxPerParam(100));
|
.labels(labels).subset(true).maxPerParam(100));
|
||||||
|
|
||||||
assertTrue(msg, gradOK);
|
assertTrue(msg, gradOK);
|
||||||
|
|
|
@ -21,6 +21,7 @@ import lombok.Getter;
|
||||||
import lombok.Setter;
|
import lombok.Setter;
|
||||||
import org.deeplearning4j.nn.api.Layer;
|
import org.deeplearning4j.nn.api.Layer;
|
||||||
import org.deeplearning4j.nn.api.ParamInitializer;
|
import org.deeplearning4j.nn.api.ParamInitializer;
|
||||||
|
import org.deeplearning4j.nn.conf.CNN2DFormat;
|
||||||
import org.deeplearning4j.nn.conf.GradientNormalization;
|
import org.deeplearning4j.nn.conf.GradientNormalization;
|
||||||
import org.deeplearning4j.nn.conf.InputPreProcessor;
|
import org.deeplearning4j.nn.conf.InputPreProcessor;
|
||||||
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
|
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
|
||||||
|
@ -80,6 +81,8 @@ public class Yolo2OutputLayer extends org.deeplearning4j.nn.conf.layers.Layer {
|
||||||
@JsonDeserialize(using = BoundingBoxesDeserializer.class)
|
@JsonDeserialize(using = BoundingBoxesDeserializer.class)
|
||||||
private INDArray boundingBoxes;
|
private INDArray boundingBoxes;
|
||||||
|
|
||||||
|
private CNN2DFormat format = CNN2DFormat.NCHW; //Default for serialization of old formats
|
||||||
|
|
||||||
private Yolo2OutputLayer() {
|
private Yolo2OutputLayer() {
|
||||||
//No-arg constructor for Jackson JSON
|
//No-arg constructor for Jackson JSON
|
||||||
}
|
}
|
||||||
|
@ -119,7 +122,8 @@ public class Yolo2OutputLayer extends org.deeplearning4j.nn.conf.layers.Layer {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void setNIn(InputType inputType, boolean override) {
|
public void setNIn(InputType inputType, boolean override) {
|
||||||
//No op
|
InputType.InputTypeConvolutional c = (InputType.InputTypeConvolutional) inputType;
|
||||||
|
this.format = c.getFormat();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -19,6 +19,7 @@ package org.deeplearning4j.nn.layers.objdetect;
|
||||||
import lombok.*;
|
import lombok.*;
|
||||||
import org.deeplearning4j.nn.api.Layer;
|
import org.deeplearning4j.nn.api.Layer;
|
||||||
import org.deeplearning4j.nn.api.layers.IOutputLayer;
|
import org.deeplearning4j.nn.api.layers.IOutputLayer;
|
||||||
|
import org.deeplearning4j.nn.conf.CNN2DFormat;
|
||||||
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
|
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
|
||||||
import org.deeplearning4j.nn.gradient.DefaultGradient;
|
import org.deeplearning4j.nn.gradient.DefaultGradient;
|
||||||
import org.deeplearning4j.nn.gradient.Gradient;
|
import org.deeplearning4j.nn.gradient.Gradient;
|
||||||
|
@ -110,6 +111,12 @@ public class Yolo2OutputLayer extends AbstractLayer<org.deeplearning4j.nn.conf.l
|
||||||
Preconditions.checkState(labels.rank() == 4, "Expected rank 4 labels array with shape [minibatch, 4+numClasses, h, w]" +
|
Preconditions.checkState(labels.rank() == 4, "Expected rank 4 labels array with shape [minibatch, 4+numClasses, h, w]" +
|
||||||
" but got rank %s labels array with shape %s", labels.rank(), labels.shape());
|
" but got rank %s labels array with shape %s", labels.rank(), labels.shape());
|
||||||
|
|
||||||
|
boolean nchw = layerConf().getFormat() == CNN2DFormat.NCHW;
|
||||||
|
INDArray input = nchw ? this.input : this.input.permute(0,3,1,2); //NHWC to NCHW
|
||||||
|
INDArray labels = this.labels.castTo(input.dataType()); //Ensure correct dtype (same as params); no-op if already correct dtype
|
||||||
|
if(!nchw)
|
||||||
|
labels = labels.permute(0,3,1,2); //NHWC to NCHW
|
||||||
|
|
||||||
double lambdaCoord = layerConf().getLambdaCoord();
|
double lambdaCoord = layerConf().getLambdaCoord();
|
||||||
double lambdaNoObj = layerConf().getLambdaNoObj();
|
double lambdaNoObj = layerConf().getLambdaNoObj();
|
||||||
|
|
||||||
|
@ -119,7 +126,7 @@ public class Yolo2OutputLayer extends AbstractLayer<org.deeplearning4j.nn.conf.l
|
||||||
int b = (int) layerConf().getBoundingBoxes().size(0);
|
int b = (int) layerConf().getBoundingBoxes().size(0);
|
||||||
int c = (int) labels.size(1)-4;
|
int c = (int) labels.size(1)-4;
|
||||||
|
|
||||||
INDArray labels = this.labels.castTo(input.dataType()); //Ensure correct dtype (same as params); no-op if already correct dtype
|
|
||||||
|
|
||||||
//Various shape arrays, to reuse
|
//Various shape arrays, to reuse
|
||||||
long[] nhw = new long[]{mb, h, w};
|
long[] nhw = new long[]{mb, h, w};
|
||||||
|
@ -380,13 +387,17 @@ public class Yolo2OutputLayer extends AbstractLayer<org.deeplearning4j.nn.conf.l
|
||||||
epsWH.addi(dLc_din_wh);
|
epsWH.addi(dLc_din_wh);
|
||||||
epsXY.addi(dLc_din_xy);
|
epsXY.addi(dLc_din_xy);
|
||||||
|
|
||||||
|
if(!nchw)
|
||||||
|
epsOut = epsOut.permute(0,2,3,1); //NCHW to NHWC
|
||||||
|
|
||||||
return epsOut;
|
return epsOut;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) {
|
public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) {
|
||||||
assertInputSet(false);
|
assertInputSet(false);
|
||||||
return YoloUtils.activate(layerConf().getBoundingBoxes(), input, workspaceMgr);
|
boolean nchw = layerConf().getFormat() == CNN2DFormat.NCHW;
|
||||||
|
return YoloUtils.activate(layerConf().getBoundingBoxes(), input, nchw, workspaceMgr);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -39,12 +39,23 @@ import static org.nd4j.linalg.indexing.NDArrayIndex.*;
|
||||||
*/
|
*/
|
||||||
public class YoloUtils {
|
public class YoloUtils {
|
||||||
|
|
||||||
/** Essentially: just apply activation functions... */
|
/** Essentially: just apply activation functions... For NCHW format. For NCHW format, use one of the other activate methods */
|
||||||
public static INDArray activate(INDArray boundingBoxPriors, INDArray input) {
|
public static INDArray activate(INDArray boundingBoxPriors, INDArray input) {
|
||||||
return activate(boundingBoxPriors, input, LayerWorkspaceMgr.noWorkspaces());
|
return activate(boundingBoxPriors, input, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static INDArray activate(@NonNull INDArray boundingBoxPriors, @NonNull INDArray input, LayerWorkspaceMgr layerWorkspaceMgr){
|
public static INDArray activate(INDArray boundingBoxPriors, INDArray input, boolean nchw) {
|
||||||
|
return activate(boundingBoxPriors, input, nchw, LayerWorkspaceMgr.noWorkspaces());
|
||||||
|
}
|
||||||
|
|
||||||
|
public static INDArray activate(@NonNull INDArray boundingBoxPriors, @NonNull INDArray input, LayerWorkspaceMgr layerWorkspaceMgr) {
|
||||||
|
return activate(boundingBoxPriors, input, true, layerWorkspaceMgr);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static INDArray activate(@NonNull INDArray boundingBoxPriors, @NonNull INDArray input, boolean nchw, LayerWorkspaceMgr layerWorkspaceMgr){
|
||||||
|
if(!nchw)
|
||||||
|
input = input.permute(0,3,1,2); //NHWC to NCHW
|
||||||
|
|
||||||
long mb = input.size(0);
|
long mb = input.size(0);
|
||||||
long h = input.size(2);
|
long h = input.size(2);
|
||||||
long w = input.size(3);
|
long w = input.size(3);
|
||||||
|
@ -83,6 +94,9 @@ public class YoloUtils {
|
||||||
INDArray outputClasses = output5.get(all(), all(), interval(5, 5+c), all(), all()); //Shape: [minibatch, C, H, W]
|
INDArray outputClasses = output5.get(all(), all(), interval(5, 5+c), all(), all()); //Shape: [minibatch, C, H, W]
|
||||||
outputClasses.assign(postSoftmax5d);
|
outputClasses.assign(postSoftmax5d);
|
||||||
|
|
||||||
|
if(!nchw)
|
||||||
|
output = output.permute(0,2,3,1); //NCHW to NHWC
|
||||||
|
|
||||||
return output;
|
return output;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue