Small fixes to subsampling layer (#158)

Signed-off-by: AlexDBlack <blacka101@gmail.com>
master
Alex Black 2019-08-23 22:50:07 +10:00 committed by GitHub
parent 614c687e4b
commit 8e3d569f18
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 59 additions and 47 deletions

View File

@ -190,7 +190,7 @@ public class LayerHelperValidationUtil {
} else { } else {
System.out.println("OK: " + p); System.out.println("OK: " + p);
} }
assertTrue("Gradients are not equal: " + p + " - highest relative error = " + maxRE + " > max relative error = " + t.getMaxRelError(), assertTrue(t.getTestName() + " - Gradients are not equal: " + p + " - highest relative error = " + maxRE + " > max relative error = " + t.getMaxRelError(),
maxRE < t.getMaxRelError()); maxRE < t.getMaxRelError());
} }
} }

View File

@ -57,56 +57,62 @@ public class ValidateMKLDNN extends BaseDL4JTest {
for (ConvolutionMode cm : new ConvolutionMode[]{ConvolutionMode.Same, ConvolutionMode.Truncate}) { for (ConvolutionMode cm : new ConvolutionMode[]{ConvolutionMode.Same, ConvolutionMode.Truncate}) {
for (int[] kernel : new int[][]{{2, 2}, {2, 3}}) { for (int[] kernel : new int[][]{{2, 2}, {2, 3}}) {
for (int[] stride : new int[][]{{1, 1}, {2, 2}}) { for (int[] stride : new int[][]{{1, 1}, {2, 2}}) {
for (PoolingType pt : new PoolingType[]{PoolingType.MAX, PoolingType.AVG}) {
inputSize[0] = minibatch; inputSize[0] = minibatch;
INDArray f = Nd4j.rand(DataType.FLOAT, inputSize); INDArray f = Nd4j.rand(DataType.FLOAT, inputSize);
INDArray l = TestUtils.randomOneHot(minibatch, 10).castTo(DataType.FLOAT); INDArray l = TestUtils.randomOneHot(minibatch, 10).castTo(DataType.FLOAT);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
.updater(new Adam(0.01)) .updater(new Adam(0.01))
.convolutionMode(cm) .convolutionMode(cm)
.seed(12345) .seed(12345)
.list() .list()
.layer(new ConvolutionLayer.Builder().activation(Activation.TANH) .layer(new ConvolutionLayer.Builder().activation(Activation.TANH)
.kernelSize(kernel) .kernelSize(kernel)
.stride(stride) .stride(stride)
.padding(0,0) .padding(0, 0)
.nOut(3) .nOut(3)
.build()) .build())
.layer(new SubsamplingLayer.Builder() .layer(new SubsamplingLayer.Builder()
.kernelSize(kernel) .poolingType(pt)
.stride(stride) .kernelSize(kernel)
.padding(0,0) .stride(stride)
.build()) .padding(0, 0)
.layer(new ConvolutionLayer.Builder().activation(Activation.TANH) .build())
.kernelSize(kernel) .layer(new ConvolutionLayer.Builder().activation(Activation.TANH)
.stride(stride) .kernelSize(kernel)
.padding(0,0) .stride(stride)
.nOut(3) .padding(0, 0)
.build()) .nOut(3)
.layer(new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build())
.setInputType(InputType.convolutional(inputSize[2], inputSize[3], inputSize[1])) .layer(new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build())
.build(); .setInputType(InputType.convolutional(inputSize[2], inputSize[3], inputSize[1]))
.build();
MultiLayerNetwork netWith = new MultiLayerNetwork(conf.clone()); MultiLayerNetwork netWith = new MultiLayerNetwork(conf.clone());
netWith.init(); netWith.init();
MultiLayerNetwork netWithout = new MultiLayerNetwork(conf.clone()); MultiLayerNetwork netWithout = new MultiLayerNetwork(conf.clone());
netWithout.init(); netWithout.init();
LayerHelperValidationUtil.TestCase tc = LayerHelperValidationUtil.TestCase.builder() String name = pt + ", mb=" + minibatch + ", cm=" + cm + ", kernel=" + Arrays.toString(kernel) + ", stride=" + Arrays.toString(stride);
.allowHelpersForClasses(Arrays.<Class<?>>asList(org.deeplearning4j.nn.layers.convolution.subsampling.SubsamplingLayer.class, LayerHelperValidationUtil.TestCase tc = LayerHelperValidationUtil.TestCase.builder()
org.deeplearning4j.nn.layers.convolution.ConvolutionLayer.class)) .testName(name)
.testForward(true) .allowHelpersForClasses(Arrays.<Class<?>>asList(org.deeplearning4j.nn.layers.convolution.subsampling.SubsamplingLayer.class,
.testScore(true) org.deeplearning4j.nn.layers.convolution.ConvolutionLayer.class))
.testBackward(true) .testForward(true)
.testTraining(true) .testScore(true)
.features(f) .testBackward(true)
.labels(l) .testTraining(true)
.data(new SingletonDataSetIterator(new DataSet(f,l))) .features(f)
.build(); .labels(l)
.data(new SingletonDataSetIterator(new DataSet(f, l)))
.build();
LayerHelperValidationUtil.validateMLN(netWith, tc); System.out.println("Starting test: " + name);
LayerHelperValidationUtil.validateMLN(netWith, tc);
}
} }
} }
} }

View File

@ -289,8 +289,14 @@ public class SubsamplingLayer extends AbstractLayer<org.deeplearning4j.nn.conf.l
b = DynamicCustomOp.builder("maxpool2d"); b = DynamicCustomOp.builder("maxpool2d");
break; break;
case AVG: case AVG:
b = DynamicCustomOp.builder("maxpool2d"); b = DynamicCustomOp.builder("avgpool2d");
extra = 1; //Divide by kH*kW not "number present" to match backward pass -- TODO change this to support both legacy behaviour (deserialized nets) and "exclude" by default for new nets if(layerConf().isAvgPoolIncludePadInDivisor()){
//Mostly this is a legacy case - beta4 and earlier models.
extra = 1; //Divide by "number present" excluding padding
} else {
//Default behaviour
extra = 0; //Divide by kH*kW not "number present"
}
break; break;
case PNORM: case PNORM:
b = DynamicCustomOp.builder("pnormpool2d"); b = DynamicCustomOp.builder("pnormpool2d");