Alex Black 32e5cc1945
First round of runtime test improvements (#7875)
* Capsnet test runtime improvements

* Slow test speedups

* Next round of test speed improvements

* More test improvements

* Improve test speed

* Next round of test speedups

* Another round

* More test speedups

* Another round

* Another round of test speedups

* Another round of speedups...

* CuDNN test speedups + more tests extending BaseDL4JTest

* Minor fix + more BaseDL4JTest use in other modules
2019-06-13 20:40:40 +10:00

701 lines
32 KiB
Java

/*******************************************************************************
* Copyright (c) 2015-2018 Skymind, Inc.
*
* This program and the accompanying materials are made available under the
* terms of the Apache License, Version 2.0 which is available at
* https://www.apache.org/licenses/LICENSE-2.0.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
* SPDX-License-Identifier: Apache-2.0
******************************************************************************/
package org.deeplearning4j.gradientcheck;
import lombok.extern.slf4j.Slf4j;
import org.deeplearning4j.BaseDL4JTest;
import org.deeplearning4j.TestUtils;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.ConvolutionMode;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
import org.deeplearning4j.nn.conf.distribution.UniformDistribution;
import org.deeplearning4j.nn.conf.dropout.Dropout;
import org.deeplearning4j.nn.conf.dropout.IDropout;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.*;
import org.deeplearning4j.nn.layers.convolution.ConvolutionHelper;
import org.deeplearning4j.nn.layers.convolution.CudnnConvolutionHelper;
import org.deeplearning4j.nn.layers.convolution.subsampling.SubsamplingHelper;
import org.deeplearning4j.nn.layers.dropout.CudnnDropoutHelper;
import org.deeplearning4j.nn.layers.normalization.BatchNormalizationHelper;
import org.deeplearning4j.nn.layers.normalization.CudnnBatchNormalizationHelper;
import org.deeplearning4j.nn.layers.normalization.CudnnLocalResponseNormalizationHelper;
import org.deeplearning4j.nn.layers.normalization.LocalResponseNormalizationHelper;
import org.deeplearning4j.nn.layers.recurrent.CudnnLSTMHelper;
import org.deeplearning4j.nn.layers.recurrent.LSTMHelper;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
import org.deeplearning4j.nn.weights.WeightInit;
import org.junit.Test;
import org.nd4j.linalg.activations.Activation;
import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.api.buffer.util.DataTypeUtil;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.function.Consumer;
import org.nd4j.linalg.learning.config.NoOp;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import java.lang.reflect.Field;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Random;
import java.util.Set;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
/**
* Created by Alex on 09/09/2016.
*/
@Slf4j
public class CuDNNGradientChecks extends BaseDL4JTest {
private static final boolean PRINT_RESULTS = true;
private static final boolean RETURN_ON_FIRST_FAILURE = false;
private static final double DEFAULT_EPS = 1e-5;
private static final double DEFAULT_MAX_REL_ERROR = 1e-2;
private static final double DEFAULT_MIN_ABS_ERROR = 1e-6;
static {
DataTypeUtil.setDTypeForContext(DataType.DOUBLE);
}
@Test
public void testConvolutional() throws Exception {
//Parameterized test, testing combinations of:
// (a) activation function
// (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
// (c) Loss function (with specified output activations)
Activation[] activFns = {Activation.SIGMOID, Activation.TANH};
boolean[] characteristic = {false, true}; //If true: run some backprop steps first
int[] minibatchSizes = {1, 4};
int width = 6;
int height = 6;
int inputDepth = 2;
int nOut = 3;
Field f = org.deeplearning4j.nn.layers.convolution.ConvolutionLayer.class.getDeclaredField("helper");
f.setAccessible(true);
Random r = new Random(12345);
for (Activation afn : activFns) {
for (boolean doLearningFirst : characteristic) {
for (int minibatchSize : minibatchSizes) {
INDArray input = Nd4j.rand(new int[] {minibatchSize, inputDepth, height, width});
INDArray labels = Nd4j.zeros(minibatchSize, nOut);
for (int i = 0; i < minibatchSize; i++) {
labels.putScalar(i, r.nextInt(nOut), 1.0);
}
MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder()
.dataType(DataType.DOUBLE)
.optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT)
.dist(new UniformDistribution(-1, 1))
.updater(new NoOp()).seed(12345L).list()
.layer(0, new ConvolutionLayer.Builder(2, 2).stride(2, 2).padding(1, 1).nOut(3)
.activation(afn).build())
.layer(1, new ConvolutionLayer.Builder(2, 2).stride(2, 2).padding(0, 0).nOut(3)
.activation(afn).build())
.layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nOut(nOut).build())
.setInputType(InputType.convolutional(height, width, inputDepth))
;
MultiLayerConfiguration conf = builder.build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
org.deeplearning4j.nn.layers.convolution.ConvolutionLayer c0 =
(org.deeplearning4j.nn.layers.convolution.ConvolutionLayer) mln.getLayer(0);
ConvolutionHelper ch0 = (ConvolutionHelper) f.get(c0);
assertTrue(ch0 instanceof CudnnConvolutionHelper);
org.deeplearning4j.nn.layers.convolution.ConvolutionLayer c1 =
(org.deeplearning4j.nn.layers.convolution.ConvolutionLayer) mln.getLayer(1);
ConvolutionHelper ch1 = (ConvolutionHelper) f.get(c1);
assertTrue(ch1 instanceof CudnnConvolutionHelper);
//-------------------------------
//For debugging/comparison to no-cudnn case: set helper field to null
// f.set(c0, null);
// f.set(c1, null);
// assertNull(f.get(c0));
// assertNull(f.get(c1));
//-------------------------------
String name = new Object() {}.getClass().getEnclosingMethod().getName();
if (doLearningFirst) {
//Run a number of iterations of learning
mln.setInput(input);
mln.setLabels(labels);
mln.computeGradientAndScore();
double scoreBefore = mln.score();
for (int j = 0; j < 10; j++)
mln.fit(input, labels);
mln.computeGradientAndScore();
double scoreAfter = mln.score();
//Can't test in 'characteristic mode of operation' if not learning
String msg = name + " - score did not (sufficiently) decrease during learning - activationFn="
+ afn + ", doLearningFirst= " + doLearningFirst + " (before=" + scoreBefore
+ ", scoreAfter=" + scoreAfter + ")";
assertTrue(msg, scoreAfter < 0.8 * scoreBefore);
}
if (PRINT_RESULTS) {
System.out.println(name + " - activationFn=" + afn + ", doLearningFirst=" + doLearningFirst);
for (int j = 0; j < mln.getnLayers(); j++)
System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(gradOK);
}
}
}
}
@Test
public void testConvolutionalNoBias() throws Exception {
int[] minibatchSizes = {1, 4};
int width = 6;
int height = 6;
int inputDepth = 2;
int nOut = 3;
Field f = org.deeplearning4j.nn.layers.convolution.ConvolutionLayer.class.getDeclaredField("helper");
f.setAccessible(true);
Random r = new Random(12345);
for (int minibatchSize : minibatchSizes) {
for (boolean convHasBias : new boolean[]{true, false}) {
INDArray input = Nd4j.rand(new int[]{minibatchSize, inputDepth, height, width});
INDArray labels = Nd4j.zeros(minibatchSize, nOut);
for (int i = 0; i < minibatchSize; i++) {
labels.putScalar(i, r.nextInt(nOut), 1.0);
}
MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder()
.dataType(DataType.DOUBLE)
.dist(new UniformDistribution(-1, 1))
.updater(new NoOp()).seed(12345L)
.list()
.layer(0, new ConvolutionLayer.Builder(2, 2).stride(2, 2).padding(1, 1).nOut(3)
.hasBias(convHasBias)
.activation(Activation.TANH).build())
.layer(1, new ConvolutionLayer.Builder(2, 2).stride(2, 2).padding(0, 0).nOut(3)
.hasBias(convHasBias)
.activation(Activation.TANH).build())
.layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nOut(nOut).build())
.setInputType(InputType.convolutional(height, width, inputDepth))
;
MultiLayerConfiguration conf = builder.build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
org.deeplearning4j.nn.layers.convolution.ConvolutionLayer c0 =
(org.deeplearning4j.nn.layers.convolution.ConvolutionLayer) mln.getLayer(0);
ConvolutionHelper ch0 = (ConvolutionHelper) f.get(c0);
assertTrue(ch0 instanceof CudnnConvolutionHelper);
org.deeplearning4j.nn.layers.convolution.ConvolutionLayer c1 =
(org.deeplearning4j.nn.layers.convolution.ConvolutionLayer) mln.getLayer(1);
ConvolutionHelper ch1 = (ConvolutionHelper) f.get(c1);
assertTrue(ch1 instanceof CudnnConvolutionHelper);
String name = new Object() {}.getClass().getEnclosingMethod().getName() + ", minibatch = "
+ minibatchSize + ", convHasBias = " + convHasBias;
if (PRINT_RESULTS) {
System.out.println(name);
for (int j = 0; j < mln.getnLayers(); j++)
System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(name, gradOK);
}
}
}
@Test
public void testBatchNormCnn() throws Exception {
//Note: CuDNN batch norm supports 4d only, as per 5.1 (according to api reference documentation)
Nd4j.getRandom().setSeed(12345);
int minibatch = 10;
int depth = 1;
int hw = 4;
int nOut = 4;
INDArray input = Nd4j.rand(new int[] {minibatch, depth, hw, hw});
INDArray labels = Nd4j.zeros(minibatch, nOut);
Random r = new Random(12345);
for (int i = 0; i < minibatch; i++) {
labels.putScalar(i, r.nextInt(nOut), 1.0);
}
MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().updater(new NoOp())
.dataType(DataType.DOUBLE)
.seed(12345L)
.dist(new NormalDistribution(0, 2)).list()
.layer(0, new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1).nIn(depth).nOut(2)
.activation(Activation.IDENTITY).build())
.layer(1, new BatchNormalization.Builder().build())
.layer(2, new ActivationLayer.Builder().activation(Activation.TANH).build())
.layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nOut(nOut).build())
.setInputType(InputType.convolutional(hw, hw, depth));
MultiLayerNetwork mln = new MultiLayerNetwork(builder.build());
mln.init();
Field f = org.deeplearning4j.nn.layers.normalization.BatchNormalization.class.getDeclaredField("helper");
f.setAccessible(true);
org.deeplearning4j.nn.layers.normalization.BatchNormalization b =
(org.deeplearning4j.nn.layers.normalization.BatchNormalization) mln.getLayer(1);
BatchNormalizationHelper bn = (BatchNormalizationHelper) f.get(b);
assertTrue(bn instanceof CudnnBatchNormalizationHelper);
//-------------------------------
//For debugging/comparison to no-cudnn case: set helper field to null
// f.set(b, null);
// assertNull(f.get(b));
//-------------------------------
if (PRINT_RESULTS) {
for (int j = 0; j < mln.getnLayers(); j++)
System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
}
//Mean and variance vars are not gradient checkable; mean/variance "gradient" is used to implement running mean/variance calc
//i.e., runningMean = decay * runningMean + (1-decay) * batchMean
//However, numerical gradient will be 0 as forward pass doesn't depend on this "parameter"
Set<String> excludeParams = new HashSet<>(Arrays.asList("1_mean", "1_var", "1_log10stdev"));
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels, excludeParams);
assertTrue(gradOK);
}
@Test
public void testLRN() throws Exception {
Nd4j.getRandom().setSeed(12345);
int minibatch = 10;
int depth = 6;
int hw = 5;
int nOut = 4;
INDArray input = Nd4j.rand(new int[] {minibatch, depth, hw, hw});
INDArray labels = Nd4j.zeros(minibatch, nOut);
Random r = new Random(12345);
for (int i = 0; i < minibatch; i++) {
labels.putScalar(i, r.nextInt(nOut), 1.0);
}
MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().updater(new NoOp())
.dataType(DataType.DOUBLE)
.seed(12345L)
.dist(new NormalDistribution(0, 2)).list()
.layer(0, new ConvolutionLayer.Builder().nOut(6).kernelSize(2, 2).stride(1, 1)
.activation(Activation.TANH).build())
.layer(1, new LocalResponseNormalization.Builder().build())
.layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nOut(nOut).build())
.setInputType(InputType.convolutional(hw, hw, depth));
MultiLayerNetwork mln = new MultiLayerNetwork(builder.build());
mln.init();
Field f = org.deeplearning4j.nn.layers.normalization.LocalResponseNormalization.class
.getDeclaredField("helper");
f.setAccessible(true);
org.deeplearning4j.nn.layers.normalization.LocalResponseNormalization l =
(org.deeplearning4j.nn.layers.normalization.LocalResponseNormalization) mln.getLayer(1);
LocalResponseNormalizationHelper lrn = (LocalResponseNormalizationHelper) f.get(l);
assertTrue(lrn instanceof CudnnLocalResponseNormalizationHelper);
//-------------------------------
//For debugging/comparison to no-cudnn case: set helper field to null
// f.set(l, null);
// assertNull(f.get(l));
//-------------------------------
if (PRINT_RESULTS) {
for (int j = 0; j < mln.getnLayers(); j++)
System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(gradOK);
}
@Test
public void testLSTM() throws Exception {
Nd4j.getRandom().setSeed(12345);
int minibatch = 4;
int inputSize = 3;
int lstmLayerSize = 4;
int timeSeriesLength = 3;
int nOut = 4;
INDArray input = Nd4j.rand(new int[] {minibatch, inputSize, timeSeriesLength});
INDArray labels = Nd4j.zeros(minibatch, nOut, timeSeriesLength);
Random r = new Random(12345);
for (int i = 0; i < minibatch; i++) {
for (int j = 0; j < timeSeriesLength; j++) {
labels.putScalar(i, r.nextInt(nOut), j, 1.0);
}
}
MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder()
.dataType(DataType.DOUBLE)
.updater(new NoOp()).seed(12345L)
.dist(new NormalDistribution(0, 2)).list()
.layer(0, new LSTM.Builder().nIn(input.size(1)).nOut(lstmLayerSize)
.gateActivationFunction(Activation.SIGMOID).activation(Activation.TANH).build())
.layer(1, new LSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize)
.gateActivationFunction(Activation.SIGMOID).activation(Activation.TANH).build())
.layer(2, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nIn(lstmLayerSize).nOut(nOut).build())
;
MultiLayerNetwork mln = new MultiLayerNetwork(builder.build());
mln.init();
Field f = org.deeplearning4j.nn.layers.recurrent.LSTM.class.getDeclaredField("helper");
f.setAccessible(true);
org.deeplearning4j.nn.layers.recurrent.LSTM l = (org.deeplearning4j.nn.layers.recurrent.LSTM) mln.getLayer(1);
LSTMHelper helper = (LSTMHelper) f.get(l);
assertTrue(helper instanceof CudnnLSTMHelper);
//-------------------------------
//For debugging/comparison to no-cudnn case: set helper field to null
// f.set(l, null);
// assertNull(f.get(l));
//-------------------------------
if (PRINT_RESULTS) {
for (int j = 0; j < mln.getnLayers(); j++)
System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels, null, null, true, 32);
assertTrue(gradOK);
}
@Test
public void testLSTM2() throws Exception {
Nd4j.getRandom().setSeed(12345);
int minibatch = 10;
int inputSize = 3;
int lstmLayerSize = 4;
int timeSeriesLength = 3;
int nOut = 2;
INDArray input = Nd4j.rand(new int[] {minibatch, inputSize, timeSeriesLength});
INDArray labels = Nd4j.zeros(minibatch, nOut, timeSeriesLength);
Random r = new Random(12345);
for (int i = 0; i < minibatch; i++) {
for (int j = 0; j < timeSeriesLength; j++) {
labels.putScalar(i, r.nextInt(nOut), j, 1.0);
}
}
MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder()
.dataType(DataType.DOUBLE)
.updater(new NoOp()).seed(12345L)
.dist(new NormalDistribution(0, 2)).list()
.layer(0, new LSTM.Builder().nIn(input.size(1)).nOut(lstmLayerSize)
.gateActivationFunction(Activation.SIGMOID).activation(Activation.TANH).build())
.layer(1, new LSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize)
.gateActivationFunction(Activation.SIGMOID).activation(Activation.TANH).build())
.layer(2, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nIn(lstmLayerSize).nOut(nOut).build())
;
MultiLayerNetwork mln = new MultiLayerNetwork(builder.build());
mln.init();
Field f = org.deeplearning4j.nn.layers.recurrent.LSTM.class.getDeclaredField("helper");
f.setAccessible(true);
org.deeplearning4j.nn.layers.recurrent.LSTM l = (org.deeplearning4j.nn.layers.recurrent.LSTM) mln.getLayer(1);
LSTMHelper helper = (LSTMHelper) f.get(l);
assertTrue(helper instanceof CudnnLSTMHelper);
//-------------------------------
//For debugging/comparison to no-cudnn case: set helper field to null
// f.set(l, null);
// assertNull(f.get(l));
//-------------------------------
if (PRINT_RESULTS) {
for (int j = 0; j < mln.getnLayers(); j++)
System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(gradOK);
}
@Test
public void testCnnDilated() throws Exception {
int nOut = 2;
int minibatchSize = 3;
int width = 8;
int height = 8;
int inputDepth = 3;
Nd4j.getRandom().setSeed(12345);
Field f = org.deeplearning4j.nn.layers.convolution.ConvolutionLayer.class.getDeclaredField("helper");
f.setAccessible(true);
Field f2 = org.deeplearning4j.nn.layers.convolution.subsampling.SubsamplingLayer.class.getDeclaredField("helper");
f2.setAccessible(true);
int[] kernelSizes = new int[]{2, 3, 2};
int[] strides = {1, 2, 2};
int[] dilation = {2, 3, 2};
ConvolutionMode[] cModes = new ConvolutionMode[]{ConvolutionMode.Truncate, ConvolutionMode.Same, ConvolutionMode.Truncate};
for (boolean subsampling : new boolean[]{false, true}) {
for (int t = 0; t < kernelSizes.length; t++) {
int k = kernelSizes[t];
int s = strides[t];
int d = dilation[t];
ConvolutionMode cm = cModes[t];
//Use larger input with larger dilation values (to avoid invalid config)
int w = d * width;
int h = d * height;
INDArray input = Nd4j.rand(minibatchSize, w * h * inputDepth);
INDArray labels = Nd4j.zeros(minibatchSize, nOut);
for (int i = 0; i < minibatchSize; i++) {
labels.putScalar(new int[]{i, i % nOut}, 1.0);
}
NeuralNetConfiguration.ListBuilder b = new NeuralNetConfiguration.Builder().seed(12345)
.dataType(DataType.DOUBLE)
.updater(new NoOp())
.activation(Activation.TANH).convolutionMode(cm).list()
.layer(new ConvolutionLayer.Builder().name("layer 0")
.kernelSize(k, k)
.stride(s, s)
.dilation(d, d)
.nIn(inputDepth).nOut(2).build());
if (subsampling) {
b.layer(new SubsamplingLayer.Builder()
.poolingType(SubsamplingLayer.PoolingType.MAX)
.kernelSize(k, k)
.stride(s, s)
.dilation(d, d)
.build());
} else {
b.layer(new ConvolutionLayer.Builder().nIn(2).nOut(2)
.kernelSize(k, k)
.stride(s, s)
.dilation(d, d)
.build());
}
MultiLayerConfiguration conf = b.layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nOut(nOut).build())
.setInputType(InputType.convolutionalFlat(h, w, inputDepth)).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
org.deeplearning4j.nn.layers.convolution.ConvolutionLayer c0 =
(org.deeplearning4j.nn.layers.convolution.ConvolutionLayer) net.getLayer(0);
ConvolutionHelper ch0 = (ConvolutionHelper) f.get(c0);
assertTrue(ch0 instanceof CudnnConvolutionHelper);
if (subsampling) {
org.deeplearning4j.nn.layers.convolution.subsampling.SubsamplingLayer s1 =
(org.deeplearning4j.nn.layers.convolution.subsampling.SubsamplingLayer) net.getLayer(1);
SubsamplingHelper sh1 = (SubsamplingHelper) f2.get(s1);
assertTrue(sh1 instanceof SubsamplingHelper);
} else {
org.deeplearning4j.nn.layers.convolution.ConvolutionLayer c1 =
(org.deeplearning4j.nn.layers.convolution.ConvolutionLayer) net.getLayer(1);
ConvolutionHelper ch1 = (ConvolutionHelper) f.get(c1);
assertTrue(ch1 instanceof CudnnConvolutionHelper);
}
for (int i = 0; i < net.getLayers().length; i++) {
System.out.println("nParams, layer " + i + ": " + net.getLayer(i).numParams());
}
String msg = (subsampling ? "subsampling" : "conv") + " - mb=" + minibatchSize + ", k="
+ k + ", s=" + s + ", d=" + d + ", cm=" + cm;
System.out.println(msg);
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(msg, gradOK);
}
}
}
@Test
public void testDropout() {
int minibatch = 2;
for (boolean cnn : new boolean[]{false, true}) {
Nd4j.getRandom().setSeed(12345);
IDropout dropout = new Dropout(0.6);
NeuralNetConfiguration.ListBuilder builder = new NeuralNetConfiguration.Builder()
.seed(12345)
.dataType(DataType.DOUBLE)
.dist(new NormalDistribution(0, 1))
.convolutionMode(ConvolutionMode.Same)
.dropOut(dropout)
.activation(Activation.TANH)
.updater(new NoOp())
.list();
if (cnn) {
builder.layer(new ConvolutionLayer.Builder().kernelSize(2, 2).stride(2, 2).nOut(2).build());
builder.layer(new ConvolutionLayer.Builder().kernelSize(2, 2).stride(2, 2).nOut(2).build());
builder.setInputType(InputType.convolutional(8, 8, 2));
} else {
builder.layer(new DenseLayer.Builder().nOut(8).build());
builder.layer(new DenseLayer.Builder().nOut(8).build());
builder.setInputType(InputType.feedForward(6));
}
builder.layer(new OutputLayer.Builder().nOut(3).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build());
MultiLayerConfiguration conf = builder.build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
INDArray f;
if (cnn) {
f = Nd4j.rand(new int[]{minibatch, 2, 8, 8}).muli(10).subi(5);
} else {
f = Nd4j.rand(minibatch, 6).muli(10).subi(5);
}
INDArray l = TestUtils.randomOneHot(minibatch, 3);
mln.output(f, true);
for (Layer layer : mln.getLayers()) {
Dropout d = (Dropout) layer.conf().getLayer().getIDropout();
assertNotNull(d);
CudnnDropoutHelper h = (CudnnDropoutHelper) d.getHelper();
assertNotNull(h);
}
String msg = (cnn ? "CNN" : "Dense") + ": " + dropout.getClass().getSimpleName();
//Consumer function to enforce CuDNN RNG repeatability - otherwise will fail due to randomness (inconsistent
// dropout mask between forward passes)
Consumer<MultiLayerNetwork> c = new Consumer<MultiLayerNetwork>() {
@Override
public void accept(MultiLayerNetwork net) {
Nd4j.getRandom().setSeed(12345);
for(Layer l : net.getLayers()){
Dropout d = (Dropout) l.conf().getLayer().getIDropout();
if(d != null){
((CudnnDropoutHelper)d.getHelper()).setMask(null);
((CudnnDropoutHelper)d.getHelper()).setRngStates(null);
}
}
}
};
log.info("*** Starting test: " + msg + " ***");
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, f, l, null, null,
false, -1, null, c);
assertTrue(msg, gradOK);
TestUtils.testModelSerialization(mln);
}
}
@Test
public void testDenseBatchNorm(){
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
.dataType(DataType.DOUBLE)
.seed(12345)
.weightInit(WeightInit.XAVIER)
.updater(new NoOp())
.list()
.layer(new DenseLayer.Builder().nIn(5).nOut(5).activation(Activation.TANH).build())
.layer(new BatchNormalization.Builder().nOut(5).build())
.layer(new OutputLayer.Builder().nIn(5).nOut(5).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build())
.build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
INDArray in = Nd4j.rand(3, 5);
INDArray labels = TestUtils.randomOneHot(3, 5);
//Mean and variance vars are not gradient checkable; mean/variance "gradient" is used to implement running mean/variance calc
//i.e., runningMean = decay * runningMean + (1-decay) * batchMean
//However, numerical gradient will be 0 as forward pass doesn't depend on this "parameter"
Set<String> excludeParams = new HashSet<>(Arrays.asList("1_mean", "1_var", "1_log10stdev"));
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, in, labels, excludeParams);
assertTrue(gradOK);
TestUtils.testModelSerialization(net);
}
}