First round of runtime test improvements (#7875)

* Capsnet test runtime improvements

* Slow test speedups

* Next round of test speed improvements

* More test improvements

* Improve test speed

* Next round of test speedups

* Another round

* More test speedups

* Another round

* Another round of test speedups

* Another round of speedups...

* CuDNN test speedups + more tests extending BaseDL4JTest

* Minor fix + more BaseDL4JTest use in other modules
master
Alex Black 2019-06-13 20:40:40 +10:00 committed by GitHub
parent b5f0ec072f
commit 32e5cc1945
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
188 changed files with 2558 additions and 1531 deletions

View File

@ -17,12 +17,14 @@
package org.deeplearning4j.gradientcheck; package org.deeplearning4j.gradientcheck;
import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.BaseDL4JTest;
import org.deeplearning4j.TestUtils;
import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.graph.AttentionVertex; import org.deeplearning4j.nn.conf.graph.AttentionVertex;
import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.*; import org.deeplearning4j.nn.conf.layers.*;
import org.deeplearning4j.nn.conf.layers.recurrent.SimpleRnn;
import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.graph.ComputationGraph;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
import org.deeplearning4j.nn.weights.WeightInit; import org.deeplearning4j.nn.weights.WeightInit;
@ -44,7 +46,7 @@ public class AttentionLayerTest extends BaseDL4JTest {
@Rule @Rule
public ExpectedException exceptionRule = ExpectedException.none(); public ExpectedException exceptionRule = ExpectedException.none();
private static final boolean PRINT_RESULTS = false; private static final boolean PRINT_RESULTS = true;
private static final boolean RETURN_ON_FIRST_FAILURE = false; private static final boolean RETURN_ON_FIRST_FAILURE = false;
private static final double DEFAULT_EPS = 1e-6; private static final double DEFAULT_EPS = 1e-6;
private static final double DEFAULT_MAX_REL_ERROR = 1e-3; private static final double DEFAULT_MAX_REL_ERROR = 1e-3;
@ -53,19 +55,15 @@ public class AttentionLayerTest extends BaseDL4JTest {
@Test @Test
public void testSelfAttentionLayer() { public void testSelfAttentionLayer() {
int nIn = 3; int nIn = 3;
int nOut = 5; int nOut = 2;
int tsLength = 4; int tsLength = 4;
int layerSize = 8; int layerSize = 4;
Random r = new Random(12345); for (int mb : new int[]{1, 3}) {
for (int mb : new int[]{1, 2, 3}) {
for (boolean inputMask : new boolean[]{false, true}) { for (boolean inputMask : new boolean[]{false, true}) {
for (boolean projectInput : new boolean[]{false, true}) { for (boolean projectInput : new boolean[]{false, true}) {
INDArray in = Nd4j.rand(new int[]{mb, nIn, tsLength}); INDArray in = Nd4j.rand(DataType.DOUBLE, new int[]{mb, nIn, tsLength});
INDArray labels = Nd4j.create(mb, nOut); INDArray labels = TestUtils.randomOneHot(mb, nOut);
for (int i = 0; i < mb; i++) {
labels.putScalar(i, r.nextInt(nOut), 1.0);
}
String maskType = (inputMask ? "inputMask" : "none"); String maskType = (inputMask ? "inputMask" : "none");
INDArray inMask = null; INDArray inMask = null;
@ -94,7 +92,7 @@ public class AttentionLayerTest extends BaseDL4JTest {
.list() .list()
.layer(new LSTM.Builder().nOut(layerSize).build()) .layer(new LSTM.Builder().nOut(layerSize).build())
.layer( projectInput ? .layer( projectInput ?
new SelfAttentionLayer.Builder().nOut(8).nHeads(2).projectInput(true).build() new SelfAttentionLayer.Builder().nOut(4).nHeads(2).projectInput(true).build()
: new SelfAttentionLayer.Builder().nHeads(1).projectInput(false).build() : new SelfAttentionLayer.Builder().nHeads(1).projectInput(false).build()
) )
.layer(new GlobalPoolingLayer.Builder().poolingType(PoolingType.MAX).build()) .layer(new GlobalPoolingLayer.Builder().poolingType(PoolingType.MAX).build())
@ -107,7 +105,7 @@ public class AttentionLayerTest extends BaseDL4JTest {
net.init(); net.init();
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, in, labels, inMask, null); DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, in, labels, inMask, null, true, 100);
assertTrue(name, gradOK); assertTrue(name, gradOK);
} }
} }
@ -117,20 +115,16 @@ public class AttentionLayerTest extends BaseDL4JTest {
@Test @Test
public void testLearnedSelfAttentionLayer() { public void testLearnedSelfAttentionLayer() {
int nIn = 3; int nIn = 3;
int nOut = 5; int nOut = 2;
int tsLength = 4; int tsLength = 4;
int layerSize = 8; int layerSize = 4;
int numQueries = 6; int numQueries = 3;
Random r = new Random(12345);
for (boolean inputMask : new boolean[]{false, true}) { for (boolean inputMask : new boolean[]{false, true}) {
for (int mb : new int[]{3, 2, 1}) { for (int mb : new int[]{3, 1}) {
for (boolean projectInput : new boolean[]{false, true}) { for (boolean projectInput : new boolean[]{false, true}) {
INDArray in = Nd4j.rand(new int[]{mb, nIn, tsLength}); INDArray in = Nd4j.rand(DataType.DOUBLE, new int[]{mb, nIn, tsLength});
INDArray labels = Nd4j.create(mb, nOut); INDArray labels = TestUtils.randomOneHot(mb, nOut);
for (int i = 0; i < mb; i++) {
labels.putScalar(i, r.nextInt(nOut), 1.0);
}
String maskType = (inputMask ? "inputMask" : "none"); String maskType = (inputMask ? "inputMask" : "none");
INDArray inMask = null; INDArray inMask = null;
@ -159,7 +153,7 @@ public class AttentionLayerTest extends BaseDL4JTest {
.list() .list()
.layer(new LSTM.Builder().nOut(layerSize).build()) .layer(new LSTM.Builder().nOut(layerSize).build())
.layer( projectInput ? .layer( projectInput ?
new LearnedSelfAttentionLayer.Builder().nOut(8).nHeads(2).nQueries(numQueries).projectInput(true).build() new LearnedSelfAttentionLayer.Builder().nOut(4).nHeads(2).nQueries(numQueries).projectInput(true).build()
: new LearnedSelfAttentionLayer.Builder().nHeads(1).nQueries(numQueries).projectInput(false).build() : new LearnedSelfAttentionLayer.Builder().nHeads(1).nQueries(numQueries).projectInput(false).build()
) )
.layer(new GlobalPoolingLayer.Builder().poolingType(PoolingType.MAX).build()) .layer(new GlobalPoolingLayer.Builder().poolingType(PoolingType.MAX).build())
@ -172,7 +166,7 @@ public class AttentionLayerTest extends BaseDL4JTest {
net.init(); net.init();
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, in, labels, inMask, null); DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, in, labels, inMask, null, true, 100);
assertTrue(name, gradOK); assertTrue(name, gradOK);
} }
} }
@ -182,10 +176,10 @@ public class AttentionLayerTest extends BaseDL4JTest {
@Test @Test
public void testLearnedSelfAttentionLayer_differentMiniBatchSizes() { public void testLearnedSelfAttentionLayer_differentMiniBatchSizes() {
int nIn = 3; int nIn = 3;
int nOut = 5; int nOut = 2;
int tsLength = 4; int tsLength = 4;
int layerSize = 8; int layerSize = 4;
int numQueries = 6; int numQueries = 3;
Random r = new Random(12345); Random r = new Random(12345);
for (boolean inputMask : new boolean[]{false, true}) { for (boolean inputMask : new boolean[]{false, true}) {
@ -199,7 +193,7 @@ public class AttentionLayerTest extends BaseDL4JTest {
.list() .list()
.layer(new LSTM.Builder().nOut(layerSize).build()) .layer(new LSTM.Builder().nOut(layerSize).build())
.layer( projectInput ? .layer( projectInput ?
new LearnedSelfAttentionLayer.Builder().nOut(8).nHeads(2).nQueries(numQueries).projectInput(true).build() new LearnedSelfAttentionLayer.Builder().nOut(4).nHeads(2).nQueries(numQueries).projectInput(true).build()
: new LearnedSelfAttentionLayer.Builder().nHeads(1).nQueries(numQueries).projectInput(false).build() : new LearnedSelfAttentionLayer.Builder().nHeads(1).nQueries(numQueries).projectInput(false).build()
) )
.layer(new GlobalPoolingLayer.Builder().poolingType(PoolingType.MAX).build()) .layer(new GlobalPoolingLayer.Builder().poolingType(PoolingType.MAX).build())
@ -210,17 +204,14 @@ public class AttentionLayerTest extends BaseDL4JTest {
MultiLayerNetwork net = new MultiLayerNetwork(conf); MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init(); net.init();
for (int mb : new int[]{3, 2, 1}) { for (int mb : new int[]{3, 1}) {
INDArray in = Nd4j.rand(new int[]{mb, nIn, tsLength}); INDArray in = Nd4j.rand(DataType.DOUBLE, new int[]{mb, nIn, tsLength});
INDArray labels = Nd4j.create(mb, nOut); INDArray labels = TestUtils.randomOneHot(mb, nOut);
for (int i = 0; i < mb; i++) {
labels.putScalar(i, r.nextInt(nOut), 1.0);
}
String maskType = (inputMask ? "inputMask" : "none"); String maskType = (inputMask ? "inputMask" : "none");
INDArray inMask = null; INDArray inMask = null;
if (inputMask) { if (inputMask) {
inMask = Nd4j.ones(mb, tsLength); inMask = Nd4j.ones(DataType.INT, mb, tsLength);
for (int i = 0; i < mb; i++) { for (int i = 0; i < mb; i++) {
int firstMaskedStep = tsLength - 1 - i; int firstMaskedStep = tsLength - 1 - i;
if (firstMaskedStep == 0) { if (firstMaskedStep == 0) {
@ -236,7 +227,7 @@ public class AttentionLayerTest extends BaseDL4JTest {
System.out.println("Starting test: " + name); System.out.println("Starting test: " + name);
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, in, labels, inMask, null); DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, in, labels, inMask, null, true, 100);
assertTrue(name, gradOK); assertTrue(name, gradOK);
} }
} }
@ -282,20 +273,15 @@ public class AttentionLayerTest extends BaseDL4JTest {
@Test @Test
public void testRecurrentAttentionLayer() { public void testRecurrentAttentionLayer() {
int nIn = 9; int nIn = 4;
int nOut = 5; int nOut = 2;
int tsLength = 4; int tsLength = 3;
int layerSize = 8; int layerSize = 3;
for (int mb : new int[]{3, 1}) {
Random r = new Random(12345);
for (int mb : new int[]{3, 2, 1}) {
for (boolean inputMask : new boolean[]{true, false}) { for (boolean inputMask : new boolean[]{true, false}) {
INDArray in = Nd4j.rand(new int[]{mb, nIn, tsLength}); INDArray in = Nd4j.rand(DataType.DOUBLE, new int[]{mb, nIn, tsLength});
INDArray labels = Nd4j.create(mb, nOut); INDArray labels = TestUtils.randomOneHot(mb, nOut);
for (int i = 0; i < mb; i++) {
labels.putScalar(i, r.nextInt(nOut), 1.0);
}
String maskType = (inputMask ? "inputMask" : "none"); String maskType = (inputMask ? "inputMask" : "none");
INDArray inMask = null; INDArray inMask = null;
@ -335,8 +321,7 @@ public class AttentionLayerTest extends BaseDL4JTest {
//System.out.println("Original"); //System.out.println("Original");
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, in, labels, inMask, null, false, -1, null DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, in, labels, inMask, null, true, 100, null);
);
assertTrue(name, gradOK); assertTrue(name, gradOK);
} }
} }
@ -345,19 +330,16 @@ public class AttentionLayerTest extends BaseDL4JTest {
@Test @Test
public void testAttentionVertex() { public void testAttentionVertex() {
int nIn = 3; int nIn = 3;
int nOut = 5; int nOut = 2;
int tsLength = 4; int tsLength = 3;
int layerSize = 8; int layerSize = 3;
Random r = new Random(12345); Random r = new Random(12345);
for (boolean inputMask : new boolean[]{false, true}) { for (boolean inputMask : new boolean[]{false, true}) {
for (int mb : new int[]{3, 2, 1}) { for (int mb : new int[]{3, 1}) {
for (boolean projectInput : new boolean[]{false, true}) { for (boolean projectInput : new boolean[]{false, true}) {
INDArray in = Nd4j.rand(new int[]{mb, nIn, tsLength}); INDArray in = Nd4j.rand(DataType.DOUBLE, new int[]{mb, nIn, tsLength});
INDArray labels = Nd4j.create(mb, nOut); INDArray labels = TestUtils.randomOneHot(mb, nOut);
for (int i = 0; i < mb; i++) {
labels.putScalar(i, r.nextInt(nOut), 1.0);
}
String maskType = (inputMask ? "inputMask" : "none"); String maskType = (inputMask ? "inputMask" : "none");
INDArray inMask = null; INDArray inMask = null;
@ -385,13 +367,13 @@ public class AttentionLayerTest extends BaseDL4JTest {
.weightInit(WeightInit.XAVIER) .weightInit(WeightInit.XAVIER)
.graphBuilder() .graphBuilder()
.addInputs("input") .addInputs("input")
.addLayer("lstmKeys", new LSTM.Builder().nOut(layerSize).build(), "input") .addLayer("rnnKeys", new SimpleRnn.Builder().nOut(layerSize).build(), "input")
.addLayer("lstmQueries", new LSTM.Builder().nOut(layerSize).build(), "input") .addLayer("rnnQueries", new SimpleRnn.Builder().nOut(layerSize).build(), "input")
.addLayer("lstmValues", new LSTM.Builder().nOut(layerSize).build(), "input") .addLayer("rnnValues", new SimpleRnn.Builder().nOut(layerSize).build(), "input")
.addVertex("attention", .addVertex("attention",
projectInput ? projectInput ?
new AttentionVertex.Builder().nOut(8).nHeads(2).projectInput(true).nInQueries(layerSize).nInKeys(layerSize).nInValues(layerSize).build() new AttentionVertex.Builder().nOut(4).nHeads(2).projectInput(true).nInQueries(layerSize).nInKeys(layerSize).nInValues(layerSize).build()
: new AttentionVertex.Builder().nOut(8).nHeads(1).projectInput(false).nInQueries(layerSize).nInKeys(layerSize).nInValues(layerSize).build(), "lstmQueries", "lstmKeys", "lstmValues") : new AttentionVertex.Builder().nOut(3).nHeads(1).projectInput(false).nInQueries(layerSize).nInKeys(layerSize).nInValues(layerSize).build(), "rnnQueries", "rnnKeys", "rnnValues")
.addLayer("pooling", new GlobalPoolingLayer.Builder().poolingType(PoolingType.MAX).build(), "attention") .addLayer("pooling", new GlobalPoolingLayer.Builder().poolingType(PoolingType.MAX).build(), "attention")
.addLayer("output", new OutputLayer.Builder().nOut(nOut).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(), "pooling") .addLayer("output", new OutputLayer.Builder().nOut(nOut).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(), "pooling")
.setOutputs("output") .setOutputs("output")
@ -412,19 +394,16 @@ public class AttentionLayerTest extends BaseDL4JTest {
@Test @Test
public void testAttentionVertexSameInput() { public void testAttentionVertexSameInput() {
int nIn = 3; int nIn = 3;
int nOut = 5; int nOut = 2;
int tsLength = 4; int tsLength = 4;
int layerSize = 8; int layerSize = 4;
Random r = new Random(12345); Random r = new Random(12345);
for (boolean inputMask : new boolean[]{false, true}) { for (boolean inputMask : new boolean[]{false, true}) {
for (int mb : new int[]{3, 2, 1}) { for (int mb : new int[]{3, 1}) {
for (boolean projectInput : new boolean[]{false, true}) { for (boolean projectInput : new boolean[]{false, true}) {
INDArray in = Nd4j.rand(new int[]{mb, nIn, tsLength}); INDArray in = Nd4j.rand(new int[]{mb, nIn, tsLength});
INDArray labels = Nd4j.create(mb, nOut); INDArray labels = TestUtils.randomOneHot(mb, nOut);
for (int i = 0; i < mb; i++) {
labels.putScalar(i, r.nextInt(nOut), 1.0);
}
String maskType = (inputMask ? "inputMask" : "none"); String maskType = (inputMask ? "inputMask" : "none");
INDArray inMask = null; INDArray inMask = null;
@ -452,11 +431,11 @@ public class AttentionLayerTest extends BaseDL4JTest {
.weightInit(WeightInit.XAVIER) .weightInit(WeightInit.XAVIER)
.graphBuilder() .graphBuilder()
.addInputs("input") .addInputs("input")
.addLayer("lstm", new LSTM.Builder().nOut(layerSize).build(), "input") .addLayer("rnn", new SimpleRnn.Builder().activation(Activation.TANH).nOut(layerSize).build(), "input")
.addVertex("attention", .addVertex("attention",
projectInput ? projectInput ?
new AttentionVertex.Builder().nOut(8).nHeads(2).projectInput(true).nInQueries(layerSize).nInKeys(layerSize).nInValues(layerSize).build() new AttentionVertex.Builder().nOut(4).nHeads(2).projectInput(true).nInQueries(layerSize).nInKeys(layerSize).nInValues(layerSize).build()
: new AttentionVertex.Builder().nOut(8).nHeads(1).projectInput(false).nInQueries(layerSize).nInKeys(layerSize).nInValues(layerSize).build(), "lstm", "lstm", "lstm") : new AttentionVertex.Builder().nOut(4).nHeads(1).projectInput(false).nInQueries(layerSize).nInKeys(layerSize).nInValues(layerSize).build(), "rnn", "rnn", "rnn")
.addLayer("pooling", new GlobalPoolingLayer.Builder().poolingType(PoolingType.MAX).build(), "attention") .addLayer("pooling", new GlobalPoolingLayer.Builder().poolingType(PoolingType.MAX).build(), "attention")
.addLayer("output", new OutputLayer.Builder().nOut(nOut).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(), "pooling") .addLayer("output", new OutputLayer.Builder().nOut(nOut).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(), "pooling")
.setOutputs("output") .setOutputs("output")
@ -467,7 +446,8 @@ public class AttentionLayerTest extends BaseDL4JTest {
net.init(); net.init();
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, new INDArray[]{in}, new INDArray[]{labels}, inMask != null ? new INDArray[]{inMask} : null, null); DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, new INDArray[]{in},
new INDArray[]{labels}, inMask != null ? new INDArray[]{inMask} : null, null);
assertTrue(name, gradOK); assertTrue(name, gradOK);
} }
} }

View File

@ -76,7 +76,7 @@ public class BNGradientCheckTest extends BaseDL4JTest {
INDArray input = ds.getFeatures(); INDArray input = ds.getFeatures();
INDArray labels = ds.getLabels(); INDArray labels = ds.getLabels();
for(boolean useLogStd : new boolean[]{true, false}) { for (boolean useLogStd : new boolean[]{true, false}) {
MultiLayerConfiguration.Builder builder = MultiLayerConfiguration.Builder builder =
new NeuralNetConfiguration.Builder().updater(new NoOp()) new NeuralNetConfiguration.Builder().updater(new NoOp())
@ -117,14 +117,14 @@ public class BNGradientCheckTest extends BaseDL4JTest {
int depth = 1; int depth = 1;
int hw = 4; int hw = 4;
int nOut = 4; int nOut = 4;
INDArray input = Nd4j.rand(new int[] {minibatch, depth, hw, hw}); INDArray input = Nd4j.rand(new int[]{minibatch, depth, hw, hw});
INDArray labels = Nd4j.zeros(minibatch, nOut); INDArray labels = Nd4j.zeros(minibatch, nOut);
Random r = new Random(12345); Random r = new Random(12345);
for (int i = 0; i < minibatch; i++) { for (int i = 0; i < minibatch; i++) {
labels.putScalar(i, r.nextInt(nOut), 1.0); labels.putScalar(i, r.nextInt(nOut), 1.0);
} }
for(boolean useLogStd : new boolean[]{true, false}) { for (boolean useLogStd : new boolean[]{true, false}) {
MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder() MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder()
.dataType(DataType.DOUBLE) .dataType(DataType.DOUBLE)
.updater(new NoOp()).seed(12345L) .updater(new NoOp()).seed(12345L)
@ -158,20 +158,14 @@ public class BNGradientCheckTest extends BaseDL4JTest {
} }
@Test @Test
public void testGradientBNWithCNNandSubsamplingcCnfigurableProfiler() { public void testGradientBNWithCNNandSubsampling() {
//Parameterized test, testing combinations of:
Nd4j.getExecutioner().setProfilingConfig(ProfilerConfig.builder() // (a) activation function
.notOptimalArguments(true) // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
.notOptimalTAD(true) // (c) Loss function (with specified output activations)
.checkForINF(true) // (d) l1 and l2 values
.checkForNAN(true)
.checkElapsedTime(true)
.stackTrace(true)
.checkWorkspaces(true)
.build());
Activation[] activFns = {Activation.SIGMOID, Activation.TANH, Activation.IDENTITY}; Activation[] activFns = {Activation.SIGMOID, Activation.TANH, Activation.IDENTITY};
boolean[] characteristic = {false, true}; //If true: run some backprop steps first boolean[] characteristic = {true}; //If true: run some backprop steps first
LossFunctions.LossFunction[] lossFunctions = LossFunctions.LossFunction[] lossFunctions =
{LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE}; {LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE};
@ -181,24 +175,24 @@ public class BNGradientCheckTest extends BaseDL4JTest {
double[] l1vals = {0.0, 0.0, 0.2}; //i.e., use l2vals[j] with l1vals[j] double[] l1vals = {0.0, 0.0, 0.2}; //i.e., use l2vals[j] with l1vals[j]
Nd4j.getRandom().setSeed(12345); Nd4j.getRandom().setSeed(12345);
int minibatch = 10; int minibatch = 4;
int depth = 2; int depth = 2;
int hw = 5; int hw = 5;
int nOut = 3; int nOut = 2;
INDArray input = Nd4j.rand(new int[] {minibatch, depth, hw, hw}).muli(5).subi(2.5); INDArray input = Nd4j.rand(new int[]{minibatch, depth, hw, hw}).muli(5).subi(2.5);
INDArray labels = Nd4j.zeros(minibatch, nOut); INDArray labels = TestUtils.randomOneHot(minibatch, nOut);
Random r = new Random(12345);
for (int i = 0; i < minibatch; i++) {
labels.putScalar(i, r.nextInt(nOut), 1.0);
}
DataSet ds = new DataSet(input, labels); DataSet ds = new DataSet(input, labels);
Random rng = new Random(12345);
for(boolean useLogStd : new boolean[]{true, false}) { for (boolean useLogStd : new boolean[]{true, false}) {
for (Activation afn : activFns) { for (Activation afn : activFns) {
for (boolean doLearningFirst : characteristic) { for (boolean doLearningFirst : characteristic) {
for (int i = 0; i < lossFunctions.length; i++) { for (int i = 0; i < lossFunctions.length; i++) {
for (int j = 0; j < l2vals.length; j++) { for (int j = 0; j < l2vals.length; j++) {
//Skip 2 of every 3 tests: from 24 cases to 8, still with decent coverage
if (rng.nextInt(3) != 0)
continue;
LossFunctions.LossFunction lf = lossFunctions[i]; LossFunctions.LossFunction lf = lossFunctions[i];
Activation outputActivation = outputActivations[i]; Activation outputActivation = outputActivations[i];
@ -260,7 +254,7 @@ public class BNGradientCheckTest extends BaseDL4JTest {
//However, numerical gradient will be 0 as forward pass doesn't depend on this "parameter" //However, numerical gradient will be 0 as forward pass doesn't depend on this "parameter"
Set<String> excludeParams = new HashSet<>(Arrays.asList("1_mean", "1_var", "3_mean", "3_var", "1_log10stdev", "3_log10stdev")); Set<String> excludeParams = new HashSet<>(Arrays.asList("1_mean", "1_var", "3_mean", "3_var", "1_log10stdev", "3_log10stdev"));
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels, excludeParams); DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels, null, null, true, 25, excludeParams); //Most params are in output layer, only these should be skipped with this threshold
assertTrue(gradOK); assertTrue(gradOK);
TestUtils.testModelSerialization(mln); TestUtils.testModelSerialization(mln);
@ -269,117 +263,6 @@ public class BNGradientCheckTest extends BaseDL4JTest {
} }
} }
} }
OpProfiler.getInstance().printOutDashboard();
}
@Test
public void testGradientBNWithCNNandSubsampling() {
Nd4j.getExecutioner().setProfilingMode(OpExecutioner.ProfilingMode.NAN_PANIC);
//Parameterized test, testing combinations of:
// (a) activation function
// (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
// (c) Loss function (with specified output activations)
// (d) l1 and l2 values
Activation[] activFns = {Activation.SIGMOID, Activation.TANH, Activation.IDENTITY};
boolean[] characteristic = {false, true}; //If true: run some backprop steps first
LossFunctions.LossFunction[] lossFunctions =
{LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE};
Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here
double[] l2vals = {0.0, 0.1, 0.1};
double[] l1vals = {0.0, 0.0, 0.2}; //i.e., use l2vals[j] with l1vals[j]
Nd4j.getRandom().setSeed(12345);
int minibatch = 10;
int depth = 2;
int hw = 5;
int nOut = 3;
INDArray input = Nd4j.rand(new int[] {minibatch, depth, hw, hw}).muli(5).subi(2.5);
INDArray labels = Nd4j.zeros(minibatch, nOut);
Random r = new Random(12345);
for (int i = 0; i < minibatch; i++) {
labels.putScalar(i, r.nextInt(nOut), 1.0);
}
DataSet ds = new DataSet(input, labels);
for(boolean useLogStd : new boolean[]{true, false}) {
for (Activation afn : activFns) {
for (boolean doLearningFirst : characteristic) {
for (int i = 0; i < lossFunctions.length; i++) {
for (int j = 0; j < l2vals.length; j++) {
LossFunctions.LossFunction lf = lossFunctions[i];
Activation outputActivation = outputActivations[i];
MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(12345)
.dataType(DataType.DOUBLE)
.l2(l2vals[j])
.optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT)
.updater(new NoOp())
.dist(new UniformDistribution(-2, 2)).seed(12345L).list()
.layer(0, new ConvolutionLayer.Builder(2, 2).stride(1, 1).nOut(3)
.activation(afn).build())
.layer(1, new BatchNormalization.Builder().useLogStd(useLogStd).build())
.layer(2, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX)
.kernelSize(2, 2).stride(1, 1).build())
.layer(3, new BatchNormalization())
.layer(4, new ActivationLayer.Builder().activation(afn).build())
.layer(5, new OutputLayer.Builder(lf).activation(outputActivation).nOut(nOut)
.build())
.setInputType(InputType.convolutional(hw, hw, depth));
MultiLayerConfiguration conf = builder.build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
String name = new Object() {
}.getClass().getEnclosingMethod().getName();
System.out.println("Num params: " + mln.numParams());
if (doLearningFirst) {
//Run a number of iterations of learning
mln.setInput(ds.getFeatures());
mln.setLabels(ds.getLabels());
mln.computeGradientAndScore();
double scoreBefore = mln.score();
for (int k = 0; k < 20; k++)
mln.fit(ds);
mln.computeGradientAndScore();
double scoreAfter = mln.score();
//Can't test in 'characteristic mode of operation' if not learning
String msg = name
+ " - score did not (sufficiently) decrease during learning - activationFn="
+ afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation
+ ", doLearningFirst= " + doLearningFirst + " (before=" + scoreBefore
+ ", scoreAfter=" + scoreAfter + ")";
assertTrue(msg, scoreAfter < 0.9 * scoreBefore);
}
if (PRINT_RESULTS) {
System.out.println(name + " - activationFn=" + afn + ", lossFn=" + lf
+ ", outputActivation=" + outputActivation + ", doLearningFirst="
+ doLearningFirst + ", l1=" + l1vals[j] + ", l2=" + l2vals[j]);
for (int k = 0; k < mln.getnLayers(); k++)
System.out.println("Layer " + k + " # params: " + mln.getLayer(k).numParams());
}
//Mean and variance vars are not gradient checkable; mean/variance "gradient" is used to implement running mean/variance calc
//i.e., runningMean = decay * runningMean + (1-decay) * batchMean
//However, numerical gradient will be 0 as forward pass doesn't depend on this "parameter"
Set<String> excludeParams = new HashSet<>(Arrays.asList("1_mean", "1_var", "3_mean", "3_var", "1_log10stdev", "3_log10stdev"));
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels, excludeParams);
assertTrue(gradOK);
TestUtils.testModelSerialization(mln);
}
}
}
}
}
OpProfiler.getInstance().printOutDashboard();
} }
@ -390,21 +273,21 @@ public class BNGradientCheckTest extends BaseDL4JTest {
// (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation') // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
// (c) Loss function (with specified output activations) // (c) Loss function (with specified output activations)
// (d) l1 and l2 values // (d) l1 and l2 values
Activation[] activFns = {Activation.SIGMOID, Activation.TANH, Activation.IDENTITY}; Activation[] activFns = {Activation.TANH, Activation.IDENTITY};
boolean[] characteristic = {false, true}; //If true: run some backprop steps first boolean[] characteristic = {true}; //If true: run some backprop steps first
LossFunctions.LossFunction[] lossFunctions = LossFunctions.LossFunction[] lossFunctions =
{LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE}; {LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE};
Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here
double[] l2vals = {0.0, 0.1, 0.1}; double[] l2vals = {0.0, 0.1};
double[] l1vals = {0.0, 0.0, 0.2}; //i.e., use l2vals[j] with l1vals[j] double[] l1vals = {0.0, 0.2}; //i.e., use l2vals[j] with l1vals[j]
Nd4j.getRandom().setSeed(12345); Nd4j.getRandom().setSeed(12345);
int minibatch = 10; int minibatch = 10;
int nIn = 5; int nIn = 5;
int nOut = 3; int nOut = 3;
INDArray input = Nd4j.rand(new int[] {minibatch, nIn}); INDArray input = Nd4j.rand(new int[]{minibatch, nIn});
INDArray labels = Nd4j.zeros(minibatch, nOut); INDArray labels = Nd4j.zeros(minibatch, nOut);
Random r = new Random(12345); Random r = new Random(12345);
for (int i = 0; i < minibatch; i++) { for (int i = 0; i < minibatch; i++) {
@ -413,7 +296,7 @@ public class BNGradientCheckTest extends BaseDL4JTest {
DataSet ds = new DataSet(input, labels); DataSet ds = new DataSet(input, labels);
for(boolean useLogStd : new boolean[]{true, false}) { for (boolean useLogStd : new boolean[]{true, false}) {
for (Activation afn : activFns) { for (Activation afn : activFns) {
for (boolean doLearningFirst : characteristic) { for (boolean doLearningFirst : characteristic) {
for (int i = 0; i < lossFunctions.length; i++) { for (int i = 0; i < lossFunctions.length; i++) {
@ -498,7 +381,7 @@ public class BNGradientCheckTest extends BaseDL4JTest {
INDArray input = ds.getFeatures(); INDArray input = ds.getFeatures();
INDArray labels = ds.getLabels(); INDArray labels = ds.getLabels();
for(boolean useLogStd : new boolean[]{true, false}) { for (boolean useLogStd : new boolean[]{true, false}) {
MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().updater(new NoOp()) MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().updater(new NoOp())
.dataType(DataType.DOUBLE) .dataType(DataType.DOUBLE)
.seed(12345L) .seed(12345L)
@ -537,14 +420,14 @@ public class BNGradientCheckTest extends BaseDL4JTest {
int depth = 1; int depth = 1;
int hw = 4; int hw = 4;
int nOut = 4; int nOut = 4;
INDArray input = Nd4j.rand(new int[] {minibatch, depth, hw, hw}); INDArray input = Nd4j.rand(new int[]{minibatch, depth, hw, hw});
INDArray labels = Nd4j.zeros(minibatch, nOut); INDArray labels = Nd4j.zeros(minibatch, nOut);
Random r = new Random(12345); Random r = new Random(12345);
for (int i = 0; i < minibatch; i++) { for (int i = 0; i < minibatch; i++) {
labels.putScalar(i, r.nextInt(nOut), 1.0); labels.putScalar(i, r.nextInt(nOut), 1.0);
} }
for(boolean useLogStd : new boolean[]{true, false}) { for (boolean useLogStd : new boolean[]{true, false}) {
MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().updater(new NoOp()) MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().updater(new NoOp())
.dataType(DataType.DOUBLE) .dataType(DataType.DOUBLE)
.seed(12345L) .seed(12345L)
@ -588,7 +471,7 @@ public class BNGradientCheckTest extends BaseDL4JTest {
int minibatchSize = 3; int minibatchSize = 3;
for(boolean useLogStd : new boolean[]{true, false}) { for (boolean useLogStd : new boolean[]{true, false}) {
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(seed).updater(new NoOp()) ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(seed).updater(new NoOp())
.dataType(DataType.DOUBLE) .dataType(DataType.DOUBLE)
@ -630,22 +513,21 @@ public class BNGradientCheckTest extends BaseDL4JTest {
// (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation') // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
// (c) Loss function (with specified output activations) // (c) Loss function (with specified output activations)
// (d) l1 and l2 values // (d) l1 and l2 values
Activation[] activFns = {Activation.SIGMOID, Activation.TANH, Activation.IDENTITY}; Activation[] activFns = {Activation.TANH, Activation.IDENTITY};
boolean[] characteristic = {false, true}; //If true: run some backprop steps first boolean doLearningFirst = true;
LossFunctions.LossFunction[] lossFunctions = LossFunctions.LossFunction[] lossFunctions = {LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD};
{LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE}; Activation[] outputActivations = {Activation.SOFTMAX}; //i.e., lossFunctions[i] used with outputActivations[i] here
Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here
double[] l2vals = {0.0, 0.1, 0.1}; double[] l2vals = {0.0, 0.1};
double[] l1vals = {0.0, 0.0, 0.2}; //i.e., use l2vals[j] with l1vals[j] double[] l1vals = {0.0, 0.2}; //i.e., use l2vals[j] with l1vals[j]
Nd4j.getRandom().setSeed(12345); Nd4j.getRandom().setSeed(12345);
int minibatch = 10; int minibatch = 10;
int depth = 2; int depth = 2;
int hw = 5; int hw = 5;
int nOut = 3; int nOut = 3;
INDArray input = Nd4j.rand(new int[] {minibatch, depth, hw, hw}); INDArray input = Nd4j.rand(new int[]{minibatch, depth, hw, hw});
INDArray labels = Nd4j.zeros(minibatch, nOut); INDArray labels = Nd4j.zeros(minibatch, nOut);
Random r = new Random(12345); Random r = new Random(12345);
for (int i = 0; i < minibatch; i++) { for (int i = 0; i < minibatch; i++) {
@ -654,75 +536,73 @@ public class BNGradientCheckTest extends BaseDL4JTest {
DataSet ds = new DataSet(input, labels); DataSet ds = new DataSet(input, labels);
for(boolean useLogStd : new boolean[]{true, false}) { for (boolean useLogStd : new boolean[]{true, false}) {
for (Activation afn : activFns) { for (Activation afn : activFns) {
for (boolean doLearningFirst : characteristic) { for (int i = 0; i < lossFunctions.length; i++) {
for (int i = 0; i < lossFunctions.length; i++) { for (int j = 0; j < l2vals.length; j++) {
for (int j = 0; j < l2vals.length; j++) { LossFunctions.LossFunction lf = lossFunctions[i];
LossFunctions.LossFunction lf = lossFunctions[i]; Activation outputActivation = outputActivations[i];
Activation outputActivation = outputActivations[i];
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
.dataType(DataType.DOUBLE) .dataType(DataType.DOUBLE)
.optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT) .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT)
.updater(new NoOp()) .updater(new NoOp())
.dist(new UniformDistribution(-2, 2)).seed(12345L).graphBuilder() .dist(new UniformDistribution(-2, 2)).seed(12345L).graphBuilder()
.addInputs("in") .addInputs("in")
.addLayer("0", new ConvolutionLayer.Builder(2, 2).stride(1, 1).nOut(3) .addLayer("0", new ConvolutionLayer.Builder(2, 2).stride(1, 1).nOut(3)
.activation(afn).build(), "in") .activation(afn).build(), "in")
.addLayer("1", new BatchNormalization.Builder().useLogStd(useLogStd).build(), "0") .addLayer("1", new BatchNormalization.Builder().useLogStd(useLogStd).build(), "0")
.addLayer("2", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX) .addLayer("2", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX)
.kernelSize(2, 2).stride(1, 1).build(), "1") .kernelSize(2, 2).stride(1, 1).build(), "1")
.addLayer("3", new BatchNormalization.Builder().useLogStd(useLogStd).build(), "2") .addLayer("3", new BatchNormalization.Builder().useLogStd(useLogStd).build(), "2")
.addLayer("4", new ActivationLayer.Builder().activation(afn).build(), "3") .addLayer("4", new ActivationLayer.Builder().activation(afn).build(), "3")
.addLayer("5", new OutputLayer.Builder(lf).activation(outputActivation) .addLayer("5", new OutputLayer.Builder(lf).activation(outputActivation)
.nOut(nOut).build(), "4") .nOut(nOut).build(), "4")
.setOutputs("5").setInputTypes(InputType.convolutional(hw, hw, depth)) .setOutputs("5").setInputTypes(InputType.convolutional(hw, hw, depth))
.build(); .build();
ComputationGraph net = new ComputationGraph(conf); ComputationGraph net = new ComputationGraph(conf);
net.init(); net.init();
String name = new Object() { String name = new Object() {
}.getClass().getEnclosingMethod().getName(); }.getClass().getEnclosingMethod().getName();
if (doLearningFirst) { if (doLearningFirst) {
//Run a number of iterations of learning //Run a number of iterations of learning
net.setInput(0, ds.getFeatures()); net.setInput(0, ds.getFeatures());
net.setLabels(ds.getLabels()); net.setLabels(ds.getLabels());
net.computeGradientAndScore(); net.computeGradientAndScore();
double scoreBefore = net.score(); double scoreBefore = net.score();
for (int k = 0; k < 20; k++) for (int k = 0; k < 20; k++)
net.fit(ds); net.fit(ds);
net.computeGradientAndScore(); net.computeGradientAndScore();
double scoreAfter = net.score(); double scoreAfter = net.score();
//Can't test in 'characteristic mode of operation' if not learning //Can't test in 'characteristic mode of operation' if not learning
String msg = name String msg = name
+ " - score did not (sufficiently) decrease during learning - activationFn=" + " - score did not (sufficiently) decrease during learning - activationFn="
+ afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation
+ ", doLearningFirst= " + doLearningFirst + " (before=" + scoreBefore + ", doLearningFirst= " + doLearningFirst + " (before=" + scoreBefore
+ ", scoreAfter=" + scoreAfter + ")"; + ", scoreAfter=" + scoreAfter + ")";
assertTrue(msg, scoreAfter < 0.9 * scoreBefore); assertTrue(msg, scoreAfter < 0.9 * scoreBefore);
}
if (PRINT_RESULTS) {
System.out.println(name + " - activationFn=" + afn + ", lossFn=" + lf
+ ", outputActivation=" + outputActivation + ", doLearningFirst="
+ doLearningFirst + ", l1=" + l1vals[j] + ", l2=" + l2vals[j]);
for (int k = 0; k < net.getNumLayers(); k++)
System.out.println("Layer " + k + " # params: " + net.getLayer(k).numParams());
}
//Mean and variance vars are not gradient checkable; mean/variance "gradient" is used to implement running mean/variance calc
//i.e., runningMean = decay * runningMean + (1-decay) * batchMean
//However, numerical gradient will be 0 as forward pass doesn't depend on this "parameter"
Set<String> excludeParams = new HashSet<>(Arrays.asList("1_mean", "1_var", "3_mean", "3_var", "1_log10stdev", "3_log10stdev"));
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE,
new INDArray[]{input}, new INDArray[]{labels}, null, null, excludeParams);
assertTrue(gradOK);
TestUtils.testModelSerialization(net);
} }
if (PRINT_RESULTS) {
System.out.println(name + " - activationFn=" + afn + ", lossFn=" + lf
+ ", outputActivation=" + outputActivation + ", doLearningFirst="
+ doLearningFirst + ", l1=" + l1vals[j] + ", l2=" + l2vals[j]);
for (int k = 0; k < net.getNumLayers(); k++)
System.out.println("Layer " + k + " # params: " + net.getLayer(k).numParams());
}
//Mean and variance vars are not gradient checkable; mean/variance "gradient" is used to implement running mean/variance calc
//i.e., runningMean = decay * runningMean + (1-decay) * batchMean
//However, numerical gradient will be 0 as forward pass doesn't depend on this "parameter"
Set<String> excludeParams = new HashSet<>(Arrays.asList("1_mean", "1_var", "3_mean", "3_var", "1_log10stdev", "3_log10stdev"));
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE,
new INDArray[]{input}, new INDArray[]{labels}, null, null, excludeParams);
assertTrue(gradOK);
TestUtils.testModelSerialization(net);
} }
} }
} }

View File

@ -151,7 +151,7 @@ public class CNN3DGradientCheckTest extends BaseDL4JTest {
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS,
DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS,
RETURN_ON_FIRST_FAILURE, input, labels); RETURN_ON_FIRST_FAILURE, input, labels, null, null, true, 128);
assertTrue(msg, gradOK); assertTrue(msg, gradOK);
@ -255,7 +255,7 @@ public class CNN3DGradientCheckTest extends BaseDL4JTest {
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS,
DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS,
RETURN_ON_FIRST_FAILURE, input, labels); RETURN_ON_FIRST_FAILURE, input, labels, null, null, true, 512);
assertTrue(msg, gradOK); assertTrue(msg, gradOK);

View File

@ -142,12 +142,6 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
// (a) activation function // (a) activation function
// (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation') // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
// (c) Loss function (with specified output activations) // (c) Loss function (with specified output activations)
Activation[] activFns = {Activation.SIGMOID, Activation.TANH};
boolean[] characteristic = {false, true}; //If true: run some backprop steps first
LossFunctions.LossFunction[] lossFunctions =
{LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE};
Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here
DataSet ds = new IrisDataSetIterator(150, 150).next(); DataSet ds = new IrisDataSetIterator(150, 150).next();
ds.normalizeZeroMeanZeroUnitVariance(); ds.normalizeZeroMeanZeroUnitVariance();
@ -159,72 +153,74 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
double[] l1vals = {0.0, 0.0, 0.5, 0.0}; double[] l1vals = {0.0, 0.0, 0.5, 0.0};
double[] biasL2 = {0.0, 0.0, 0.0, 0.2}; double[] biasL2 = {0.0, 0.0, 0.0, 0.2};
double[] biasL1 = {0.0, 0.0, 0.6, 0.0}; double[] biasL1 = {0.0, 0.0, 0.6, 0.0};
Activation[] activFns = {Activation.SIGMOID, Activation.TANH, Activation.ELU, Activation.SOFTPLUS};
boolean[] characteristic = {false, true, false, true}; //If true: run some backprop steps first
for (Activation afn : activFns) { LossFunctions.LossFunction[] lossFunctions =
for (boolean doLearningFirst : characteristic) { {LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE, LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE};
for (int i = 0; i < lossFunctions.length; i++) { Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH, Activation.SOFTMAX, Activation.IDENTITY}; //i.e., lossFunctions[i] used with outputActivations[i] here
for (int k = 0; k < l2vals.length; k++) {
LossFunctions.LossFunction lf = lossFunctions[i];
Activation outputActivation = outputActivations[i];
double l2 = l2vals[k];
double l1 = l1vals[k];
MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder() for( int i=0; i<l2vals.length; i++ ){
.dataType(DataType.DOUBLE) Activation afn = activFns[i];
.l2(l2).l1(l1).l2Bias(biasL2[k]).l1Bias(biasL1[k]) boolean doLearningFirst = characteristic[i];
.optimizationAlgo( LossFunctions.LossFunction lf = lossFunctions[i];
OptimizationAlgorithm.CONJUGATE_GRADIENT) Activation outputActivation = outputActivations[i];
.seed(12345L).list() double l2 = l2vals[i];
.layer(0, new ConvolutionLayer.Builder(new int[]{1, 1}).nIn(1).nOut(6) double l1 = l1vals[i];
.weightInit(WeightInit.XAVIER).activation(afn)
.updater(new NoOp()).build())
.layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nOut(3)
.weightInit(WeightInit.XAVIER).updater(new NoOp()).build())
.setInputType(InputType.convolutionalFlat(1, 4, 1)); MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder()
.dataType(DataType.DOUBLE)
.l2(l2).l1(l1).l2Bias(biasL2[i]).l1Bias(biasL1[i])
.optimizationAlgo(
OptimizationAlgorithm.CONJUGATE_GRADIENT)
.seed(12345L).list()
.layer(0, new ConvolutionLayer.Builder(new int[]{1, 1}).nIn(1).nOut(6)
.weightInit(WeightInit.XAVIER).activation(afn)
.updater(new NoOp()).build())
.layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nOut(3)
.weightInit(WeightInit.XAVIER).updater(new NoOp()).build())
MultiLayerConfiguration conf = builder.build(); .setInputType(InputType.convolutionalFlat(1, 4, 1));
MultiLayerNetwork mln = new MultiLayerNetwork(conf); MultiLayerConfiguration conf = builder.build();
mln.init();
String testName = new Object() {
}.getClass().getEnclosingMethod().getName();
if (doLearningFirst) { MultiLayerNetwork mln = new MultiLayerNetwork(conf);
//Run a number of iterations of learning mln.init();
mln.setInput(ds.getFeatures()); String testName = new Object() {
mln.setLabels(ds.getLabels()); }.getClass().getEnclosingMethod().getName();
mln.computeGradientAndScore();
double scoreBefore = mln.score();
for (int j = 0; j < 10; j++)
mln.fit(ds);
mln.computeGradientAndScore();
double scoreAfter = mln.score();
//Can't test in 'characteristic mode of operation' if not learning
String msg = testName
+ "- score did not (sufficiently) decrease during learning - activationFn="
+ afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation
+ ", doLearningFirst=" + doLearningFirst + " (before=" + scoreBefore
+ ", scoreAfter=" + scoreAfter + ")";
assertTrue(msg, scoreAfter < 0.8 * scoreBefore);
}
if (PRINT_RESULTS) { if (doLearningFirst) {
System.out.println(testName + "- activationFn=" + afn + ", lossFn=" + lf //Run a number of iterations of learning
+ ", outputActivation=" + outputActivation + ", doLearningFirst=" mln.setInput(ds.getFeatures());
+ doLearningFirst); mln.setLabels(ds.getLabels());
for (int j = 0; j < mln.getnLayers(); j++) mln.computeGradientAndScore();
System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams()); double scoreBefore = mln.score();
} for (int j = 0; j < 10; j++)
mln.fit(ds);
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, mln.computeGradientAndScore();
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); double scoreAfter = mln.score();
//Can't test in 'characteristic mode of operation' if not learning
assertTrue(gradOK); String msg = testName
TestUtils.testModelSerialization(mln); + "- score did not (sufficiently) decrease during learning - activationFn="
} + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation
} + ", doLearningFirst=" + doLearningFirst + " (before=" + scoreBefore
+ ", scoreAfter=" + scoreAfter + ")";
assertTrue(msg, scoreAfter < 0.8 * scoreBefore);
} }
if (PRINT_RESULTS) {
System.out.println(testName + "- activationFn=" + afn + ", lossFn=" + lf
+ ", outputActivation=" + outputActivation + ", doLearningFirst="
+ doLearningFirst);
for (int j = 0; j < mln.getnLayers(); j++)
System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(gradOK);
TestUtils.testModelSerialization(mln);
} }
} }
@ -369,56 +365,43 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
int[] padding = {0, 0}; int[] padding = {0, 0};
int size = 2; int size = 2;
String[] activations = {"sigmoid", "tanh"}; for (int minibatchSize : minibatchSizes) {
SubsamplingLayer.PoolingType[] poolingTypes = INDArray input = Nd4j.rand(minibatchSize, width * height * inputDepth);
new SubsamplingLayer.PoolingType[]{SubsamplingLayer.PoolingType.MAX, INDArray labels = TestUtils.randomOneHot(minibatchSize, nOut);
SubsamplingLayer.PoolingType.AVG, SubsamplingLayer.PoolingType.PNORM};
for (String afn : activations) { MultiLayerConfiguration conf =
for (SubsamplingLayer.PoolingType poolingType : poolingTypes) { new NeuralNetConfiguration.Builder()
for (int minibatchSize : minibatchSizes) { .dataType(DataType.DOUBLE)
INDArray input = Nd4j.rand(minibatchSize, width * height * inputDepth); .updater(new NoOp())
INDArray labels = Nd4j.zeros(minibatchSize, nOut); .dist(new NormalDistribution(0, 1))
for (int i = 0; i < minibatchSize; i++) { .list().layer(new ConvolutionLayer.Builder(kernel,
labels.putScalar(new int[]{i, i % nOut}, 1.0); stride, padding).nIn(inputDepth)
} .nOut(3).build())//output: (5-2+0)/1+1 = 4
.layer(new Upsampling2D.Builder().size(size).build()) //output: 4*2 =8 -> 8x8x3
.layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nIn(8 * 8 * 3)
.nOut(4).build())
.setInputType(InputType.convolutionalFlat(height, width,
inputDepth))
.build();
MultiLayerConfiguration conf = MultiLayerNetwork net = new MultiLayerNetwork(conf);
new NeuralNetConfiguration.Builder() net.init();
.dataType(DataType.DOUBLE)
.updater(new NoOp())
.dist(new NormalDistribution(0, 1))
.list().layer(new ConvolutionLayer.Builder(kernel,
stride, padding).nIn(inputDepth)
.nOut(3).build())//output: (5-2+0)/1+1 = 4
.layer(new Upsampling2D.Builder().size(size).build()) //output: 4*2 =8 -> 8x8x3
.layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nIn(8 * 8 * 3)
.nOut(4).build())
.setInputType(InputType.convolutionalFlat(height, width,
inputDepth))
.build();
MultiLayerNetwork net = new MultiLayerNetwork(conf); String msg = "Upsampling - minibatch=" + minibatchSize;
net.init();
String msg = "PoolingType=" + poolingType + ", minibatch=" + minibatchSize + ", activationFn=" if (PRINT_RESULTS) {
+ afn; System.out.println(msg);
for (int j = 0; j < net.getnLayers(); j++)
if (PRINT_RESULTS) { System.out.println("Layer " + j + " # params: " + net.getLayer(j).numParams());
System.out.println(msg);
for (int j = 0; j < net.getnLayers(); j++)
System.out.println("Layer " + j + " # params: " + net.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(msg, gradOK);
TestUtils.testModelSerialization(net);
}
} }
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(msg, gradOK);
TestUtils.testModelSerialization(net);
} }
} }
@ -695,60 +678,56 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
public void testCnnSamePaddingMode() { public void testCnnSamePaddingMode() {
int nOut = 2; int nOut = 2;
int[] minibatchSizes = {1, 3}; int[] minibatchSizes = {1, 3, 3, 2, 1, 2};
int[] heights = new int[]{4, 5, 6, 5, 4, 4}; //Same padding mode: insensitive to exact input size...
int[] kernelSizes = new int[]{2, 3, 2, 3, 2, 3};
int[] inputDepths = {1, 2, 4, 3, 2, 3};
int width = 5; int width = 5;
int[] heights = new int[]{4, 5, 6}; //Same padding mode: insensitive to exact input size...
int[] kernelSizes = new int[]{2, 3};
int[] inputDepths = {1, 2, 4};
Nd4j.getRandom().setSeed(12345); Nd4j.getRandom().setSeed(12345);
for (int inputDepth : inputDepths) { for( int i=0; i<minibatchSizes.length; i++ ){
for (int minibatchSize : minibatchSizes) { int inputDepth = inputDepths[i];
for (int height : heights) { int minibatchSize = minibatchSizes[i];
for (int k : kernelSizes) { int height = heights[i];
int k = kernelSizes[i];
INDArray input = Nd4j.rand(minibatchSize, width * height * inputDepth); INDArray input = Nd4j.rand(minibatchSize, width * height * inputDepth);
INDArray labels = Nd4j.zeros(minibatchSize, nOut); INDArray labels = TestUtils.randomOneHot(minibatchSize, nOut);
for (int i = 0; i < minibatchSize; i++) {
labels.putScalar(new int[]{i, i % nOut}, 1.0);
}
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
.dataType(DataType.DOUBLE) .dataType(DataType.DOUBLE)
.updater(new NoOp()) .updater(new NoOp())
.activation(Activation.TANH).convolutionMode(Same).list() .activation(Activation.TANH).convolutionMode(Same).list()
.layer(0, new ConvolutionLayer.Builder().name("layer 0").kernelSize(k, k) .layer(0, new ConvolutionLayer.Builder().name("layer 0").kernelSize(k, k)
.stride(1, 1).padding(0, 0).nIn(inputDepth).nOut(2).build()) .stride(1, 1).padding(0, 0).nIn(inputDepth).nOut(2).build())
.layer(1, new SubsamplingLayer.Builder() .layer(1, new SubsamplingLayer.Builder()
.poolingType(SubsamplingLayer.PoolingType.MAX).kernelSize(k, k) .poolingType(SubsamplingLayer.PoolingType.MAX).kernelSize(k, k)
.stride(1, 1).padding(0, 0).build()) .stride(1, 1).padding(0, 0).build())
.layer(2, new ConvolutionLayer.Builder().nIn(2).nOut(2).kernelSize(k, k) .layer(2, new ConvolutionLayer.Builder().nIn(2).nOut(2).kernelSize(k, k)
.stride(1, 1).padding(0, 0).build()) .stride(1, 1).padding(0, 0).build())
.layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nOut(nOut).build()) .activation(Activation.SOFTMAX).nOut(nOut).build())
.setInputType(InputType.convolutionalFlat(height, width, inputDepth)).build(); .setInputType(InputType.convolutionalFlat(height, width, inputDepth)).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf); MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init(); net.init();
for (int i = 0; i < net.getLayers().length; i++) { for (int j = 0; j < net.getLayers().length; j++) {
System.out.println("nParams, layer " + i + ": " + net.getLayer(i).numParams()); System.out.println("nParams, layer " + j + ": " + net.getLayer(j).numParams());
}
String msg = "Minibatch=" + minibatchSize + ", inDepth=" + inputDepth + ", height=" + height
+ ", kernelSize=" + k;
System.out.println(msg);
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(msg, gradOK);
TestUtils.testModelSerialization(net);
}
}
} }
String msg = "Minibatch=" + minibatchSize + ", inDepth=" + inputDepth + ", height=" + height
+ ", kernelSize=" + k;
System.out.println(msg);
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(msg, gradOK);
TestUtils.testModelSerialization(net);
} }
} }
@ -809,7 +788,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input,
labels); labels, null, null, true, 128);
assertTrue(msg, gradOK); assertTrue(msg, gradOK);
@ -827,68 +806,66 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
Nd4j.getRandom().setSeed(12345); Nd4j.getRandom().setSeed(12345);
int nOut = 4; int nOut = 4;
int[] minibatchSizes = {1, 3};
int width = 6; int width = 6;
int height = 6; int height = 6;
int[] inputDepths = {1, 3};
int[] kernel = {2, 2}; int[] kernel = {2, 2};
int[] stride = {1, 1}; int[] stride = {1, 1};
int[] padding = {0, 0}; int[] padding = {0, 0};
int[] minibatchSizes = {1, 3, 2};
int[] inputDepths = {1, 3, 2};
int[][] zeroPadLayer = new int[][]{{0, 0, 0, 0}, {1, 1, 0, 0}, {2, 2, 2, 2}}; int[][] zeroPadLayer = new int[][]{{0, 0, 0, 0}, {1, 1, 0, 0}, {2, 2, 2, 2}};
for (int inputDepth : inputDepths) { for( int i=0; i<minibatchSizes.length; i++ ){
for (int minibatchSize : minibatchSizes) { int minibatchSize = minibatchSizes[i];
INDArray input = Nd4j.rand(new int[]{minibatchSize, inputDepth, height, width}); int inputDepth = inputDepths[i];
INDArray labels = Nd4j.zeros(minibatchSize, nOut); int[] zeroPad = zeroPadLayer[i];
for (int i = 0; i < minibatchSize; i++) { INDArray input = Nd4j.rand(DataType.DOUBLE, new int[]{minibatchSize, inputDepth, height, width});
labels.putScalar(new int[]{i, i % nOut}, 1.0); INDArray labels = TestUtils.randomOneHot(minibatchSize, nOut);
}
for (int[] zeroPad : zeroPadLayer) {
MultiLayerConfiguration conf = MultiLayerConfiguration conf =
new NeuralNetConfiguration.Builder().updater(new NoOp()) new NeuralNetConfiguration.Builder().updater(new NoOp())
.dataType(DataType.DOUBLE) .dataType(DataType.DOUBLE)
.dist(new NormalDistribution(0, 1)).list() .dist(new NormalDistribution(0, 1)).list()
.layer(0, new ConvolutionLayer.Builder(kernel, stride, padding) .layer(0, new ConvolutionLayer.Builder(kernel, stride, padding)
.nIn(inputDepth).nOut(3).build())//output: (6-2+0)/1+1 = 5 .nIn(inputDepth).nOut(3).build())//output: (6-2+0)/1+1 = 5
.layer(1, new ZeroPaddingLayer.Builder(zeroPad).build()).layer(2, .layer(1, new ZeroPaddingLayer.Builder(zeroPad).build()).layer(2,
new ConvolutionLayer.Builder(kernel, stride, new ConvolutionLayer.Builder(kernel, stride,
padding).nIn(3).nOut(3).build())//output: (6-2+0)/1+1 = 5 padding).nIn(3).nOut(3).build())//output: (6-2+0)/1+1 = 5
.layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nOut(4).build()) .activation(Activation.SOFTMAX).nOut(4).build())
.setInputType(InputType.convolutional(height, width, inputDepth)) .setInputType(InputType.convolutional(height, width, inputDepth))
.build(); .build();
MultiLayerNetwork net = new MultiLayerNetwork(conf); MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init(); net.init();
//Check zero padding activation shape //Check zero padding activation shape
org.deeplearning4j.nn.layers.convolution.ZeroPaddingLayer zpl = org.deeplearning4j.nn.layers.convolution.ZeroPaddingLayer zpl =
(org.deeplearning4j.nn.layers.convolution.ZeroPaddingLayer) net.getLayer(1); (org.deeplearning4j.nn.layers.convolution.ZeroPaddingLayer) net.getLayer(1);
val expShape = new long[]{minibatchSize, inputDepth, height + zeroPad[0] + zeroPad[1], val expShape = new long[]{minibatchSize, inputDepth, height + zeroPad[0] + zeroPad[1],
width + zeroPad[2] + zeroPad[3]}; width + zeroPad[2] + zeroPad[3]};
INDArray out = zpl.activate(input, false, LayerWorkspaceMgr.noWorkspaces()); INDArray out = zpl.activate(input, false, LayerWorkspaceMgr.noWorkspaces());
assertArrayEquals(expShape, out.shape()); assertArrayEquals(expShape, out.shape());
String msg = "minibatch=" + minibatchSize + ", channels=" + inputDepth + ", zeroPad = " String msg = "minibatch=" + minibatchSize + ", channels=" + inputDepth + ", zeroPad = "
+ Arrays.toString(zeroPad); + Arrays.toString(zeroPad);
if (PRINT_RESULTS) { if (PRINT_RESULTS) {
System.out.println(msg); System.out.println(msg);
for (int j = 0; j < net.getnLayers(); j++) for (int j = 0; j < net.getnLayers(); j++)
System.out.println("Layer " + j + " # params: " + net.getLayer(j).numParams()); System.out.println("Layer " + j + " # params: " + net.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(msg, gradOK);
TestUtils.testModelSerialization(net);
}
} }
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(msg, gradOK);
TestUtils.testModelSerialization(net);
} }
} }
@ -896,12 +873,12 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
public void testDeconvolution2D() { public void testDeconvolution2D() {
int nOut = 2; int nOut = 2;
int[] minibatchSizes = new int[]{1, 3, 1, 3, 1, 3, 1, 3}; int[] minibatchSizes = new int[]{1, 3, 3, 1, 3};
int[] kernelSizes = new int[]{1, 1, 3, 3, 1, 1, 3, 3}; int[] kernelSizes = new int[]{1, 1, 1, 3, 3};
int[] strides = {1, 1, 1, 1, 2, 2, 2, 2}; int[] strides = {1, 1, 2, 2, 2};
int[] dilation = {1, 2, 2, 1, 1, 1, 2, 2}; int[] dilation = {1, 2, 1, 2, 2};
Activation[] activations = new Activation[]{Activation.SIGMOID, Activation.TANH, Activation.TANH, Activation.TANH, Activation.TANH, Activation.SIGMOID, Activation.SIGMOID, Activation.SIGMOID}; Activation[] activations = new Activation[]{Activation.SIGMOID, Activation.TANH, Activation.SIGMOID, Activation.SIGMOID, Activation.SIGMOID};
ConvolutionMode[] cModes = new ConvolutionMode[]{Same, Same, Same, Same, Truncate, Truncate, Truncate, Truncate}; ConvolutionMode[] cModes = new ConvolutionMode[]{Same, Same, Truncate, Truncate, Truncate};
int width = 7; int width = 7;
int height = 7; int height = 7;
int inputDepth = 3; int inputDepth = 3;
@ -954,7 +931,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
System.out.println(msg); System.out.println(msg);
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels, null, null, true, 100);
assertTrue(msg, gradOK); assertTrue(msg, gradOK);
@ -967,21 +944,17 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
int nOut = 2; int nOut = 2;
int[] minibatchSizes = new int[]{1, 3}; int[] minibatchSizes = new int[]{1, 3};
int width = 8; int width = 6;
int height = 8; int height = 6;
int inputDepth = 3; int inputDepth = 3;
int[] kernelSizes = new int[]{2, 3};
int[] strides = {1, 2};
int[] dilation = {1, 2};
ConvolutionMode[] cModes = new ConvolutionMode[]{ConvolutionMode.Truncate};
Nd4j.getRandom().setSeed(12345); Nd4j.getRandom().setSeed(12345);
int[] ks = new int[]{1, 3, 1, 3, 1, 3, 1, 3}; int[] ks = new int[]{1, 3, 3, 1, 3};
int[] ss = new int[]{1, 1, 2, 2, 1, 1, 2, 2}; int[] ss = new int[]{1, 1, 1, 2, 2};
int[] ds = new int[]{1, 1, 1, 1, 2, 2, 2, 2}; int[] ds = new int[]{1, 1, 2, 2, 2};
ConvolutionMode[] cms = new ConvolutionMode[]{Truncate, Truncate, Truncate, Truncate, Truncate, Truncate, Truncate, Truncate}; ConvolutionMode[] cms = new ConvolutionMode[]{Truncate, Truncate, Truncate, Truncate, Truncate};
int[] mb = new int[]{1, 1, 3, 3, 3, 1, 3, 3}; int[] mb = new int[]{1, 1, 1, 3, 3};
for (int t = 0; t < ks.length; t++) { for (int t = 0; t < ks.length; t++) {
@ -1030,7 +1003,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
System.out.println(msg); System.out.println(msg);
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels, null, null, true, 50); //Most params are in output layer
assertTrue(msg, gradOK); assertTrue(msg, gradOK);
@ -1042,18 +1015,18 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
public void testCnnDilated() { public void testCnnDilated() {
int nOut = 2; int nOut = 2;
int minibatchSize = 3; int minibatchSize = 2;
int width = 8; int width = 8;
int height = 8; int height = 8;
int inputDepth = 3; int inputDepth = 2;
Nd4j.getRandom().setSeed(12345); Nd4j.getRandom().setSeed(12345);
boolean[] sub = new boolean[]{true, false, true, false, true, false, true, false}; boolean[] sub = new boolean[]{true, true, false, true, false};
int[] stride = new int[]{1, 1, 2, 2, 1, 1, 2, 2}; int[] stride = new int[]{1, 1, 1, 2, 2};
int[] kernel = new int[]{2, 2, 2, 2, 3, 3, 3, 3}; int[] kernel = new int[]{2, 3, 3, 3, 3};
int[] ds = new int[]{2, 3, 3, 2, 2, 3, 3, 2}; int[] ds = new int[]{2, 2, 3, 3, 2};
ConvolutionMode[] cms = new ConvolutionMode[]{Same, Same, Same, Truncate, Truncate, Truncate, Same, Truncate}; ConvolutionMode[] cms = new ConvolutionMode[]{Same, Truncate, Truncate, Same, Truncate};
for (int t = 0; t < sub.length; t++) { for (int t = 0; t < sub.length; t++) {
@ -1126,7 +1099,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
@Test @Test
public void testCropping2DLayer() { public void testCropping2DLayer() {
Nd4j.getRandom().setSeed(12345); Nd4j.getRandom().setSeed(12345);
int nOut = 4; int nOut = 2;
int[] minibatchSizes = {1, 3}; int[] minibatchSizes = {1, 3};
int width = 12; int width = 12;
@ -1155,11 +1128,12 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
.convolutionMode(ConvolutionMode.Same) .convolutionMode(ConvolutionMode.Same)
.weightInit(new NormalDistribution(0, 1)).list() .weightInit(new NormalDistribution(0, 1)).list()
.layer(new ConvolutionLayer.Builder(kernel, stride, padding) .layer(new ConvolutionLayer.Builder(kernel, stride, padding)
.nIn(inputDepth).nOut(3).build())//output: (6-2+0)/1+1 = 5 .nIn(inputDepth).nOut(2).build())//output: (6-2+0)/1+1 = 5
.layer(new Cropping2D(crop)) .layer(new Cropping2D(crop))
.layer(new ConvolutionLayer.Builder(kernel, stride, padding).nIn(3).nOut(3).build()) .layer(new ConvolutionLayer.Builder(kernel, stride, padding).nIn(2).nOut(2).build())
.layer(new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.AVG).kernelSize(3, 3).stride(3, 3).build())
.layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nOut(4).build()) .activation(Activation.SOFTMAX).nOut(nOut).build())
.setInputType(InputType.convolutional(height, width, inputDepth)) .setInputType(InputType.convolutional(height, width, inputDepth))
.build(); .build();
@ -1184,7 +1158,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
} }
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels, null, null, true, 160);
assertTrue(msg, gradOK); assertTrue(msg, gradOK);
@ -1200,16 +1174,16 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
int depthMultiplier = 2; int depthMultiplier = 2;
int nOut = nIn * depthMultiplier; int nOut = nIn * depthMultiplier;
int width = 8; int width = 5;
int height = 8; int height = 5;
Nd4j.getRandom().setSeed(12345); Nd4j.getRandom().setSeed(12345);
int[] ks = new int[]{1,3,1,3,1,3,1,3}; int[] ks = new int[]{1,3,3,1,3};
int[] ss = new int[]{1,1,2,2,1,1,2,2}; int[] ss = new int[]{1,1,1,2,2};
ConvolutionMode[] cms = new ConvolutionMode[]{ ConvolutionMode[] cms = new ConvolutionMode[]{
Truncate, Truncate, Truncate, Truncate, Truncate, Truncate, Truncate, Truncate}; Truncate, Truncate, Truncate, Truncate, Truncate};
int[] mb = new int[]{1,1,3,3,3,1,3,3}; int[] mb = new int[]{1,1,1,3,3};
for( int t=0; t<ks.length; t++ ){ for( int t=0; t<ks.length; t++ ){
@ -1255,7 +1229,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
System.out.println(msg); System.out.println(msg);
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels, null, null, true, 256);
assertTrue(msg, gradOK); assertTrue(msg, gradOK);

View File

@ -39,6 +39,8 @@ import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.learning.config.NoOp; import org.nd4j.linalg.learning.config.NoOp;
import org.nd4j.linalg.lossfunctions.impl.LossNegativeLogLikelihood; import org.nd4j.linalg.lossfunctions.impl.LossNegativeLogLikelihood;
import java.util.Random;
public class CapsnetGradientCheckTest extends BaseDL4JTest { public class CapsnetGradientCheckTest extends BaseDL4JTest {
private static final boolean PRINT_RESULTS = true; private static final boolean PRINT_RESULTS = true;
@ -70,6 +72,7 @@ public class CapsnetGradientCheckTest extends BaseDL4JTest {
for (int capsule : capsules) { for (int capsule : capsules) {
for (int capsuleDim : capsuleDims) { for (int capsuleDim : capsuleDims) {
for (int minibatchSize : minibatchSizes) { for (int minibatchSize : minibatchSizes) {
INDArray input = Nd4j.rand(minibatchSize, inputDepth * height * width).mul(10) INDArray input = Nd4j.rand(minibatchSize, inputDepth * height * width).mul(10)
.reshape(-1, inputDepth, height, width); .reshape(-1, inputDepth, height, width);
INDArray labels = Nd4j.zeros(minibatchSize, capsule); INDArray labels = Nd4j.zeros(minibatchSize, capsule);
@ -110,7 +113,7 @@ public class CapsnetGradientCheckTest extends BaseDL4JTest {
boolean gradOK = GradientCheckUtil boolean gradOK = GradientCheckUtil
.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, .checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input,
labels); labels, null, null, true, 100);
assertTrue(msg, gradOK); assertTrue(msg, gradOK);

View File

@ -100,15 +100,15 @@ public class DropoutGradientCheck extends BaseDL4JTest {
.list(); .list();
if(cnn){ if(cnn){
builder.layer(new ConvolutionLayer.Builder().kernelSize(3,3).stride(1,1).nOut(3).build()); builder.layer(new ConvolutionLayer.Builder().kernelSize(3,3).stride(2,2).nOut(2).build());
builder.layer(new ConvolutionLayer.Builder().kernelSize(3,3).stride(1,1).nOut(3).build()); builder.layer(new ConvolutionLayer.Builder().kernelSize(3,3).stride(2,2).nOut(2).build());
builder.setInputType(InputType.convolutional(8,8,3)); builder.setInputType(InputType.convolutional(6,6,2));
} else { } else {
builder.layer(new DenseLayer.Builder().nOut(12).build()); builder.layer(new DenseLayer.Builder().nOut(3).build());
builder.layer(new DenseLayer.Builder().nOut(12).build()); builder.layer(new DenseLayer.Builder().nOut(3).build());
builder.setInputType(InputType.feedForward(8)); builder.setInputType(InputType.feedForward(6));
} }
builder.layer(new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).lossFunction(LossFunction.MCXENT).build()); builder.layer(new OutputLayer.Builder().nOut(3).activation(Activation.SOFTMAX).lossFunction(LossFunction.MCXENT).build());
MultiLayerConfiguration conf = builder.build(); MultiLayerConfiguration conf = builder.build();
//Remove spatial dropout from output layer - can't be used for 2d input //Remove spatial dropout from output layer - can't be used for 2d input
@ -123,11 +123,11 @@ public class DropoutGradientCheck extends BaseDL4JTest {
INDArray f; INDArray f;
if(cnn){ if(cnn){
f = Nd4j.rand(new int[]{minibatch, 3, 8, 8}).muli(10).subi(5); f = Nd4j.rand(new int[]{minibatch, 2, 6, 6}).muli(10).subi(5);
} else { } else {
f = Nd4j.rand(minibatch, 8).muli(10).subi(5); f = Nd4j.rand(minibatch, 6).muli(10).subi(5);
} }
INDArray l = TestUtils.randomOneHot(minibatch, 10); INDArray l = TestUtils.randomOneHot(minibatch, 3);
log.info("*** Starting test: " + msg + " ***"); log.info("*** Starting test: " + msg + " ***");
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,

View File

@ -24,6 +24,7 @@ import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.*; import org.deeplearning4j.nn.conf.layers.*;
import org.deeplearning4j.nn.conf.layers.recurrent.SimpleRnn;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
import org.junit.Test; import org.junit.Test;
import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.activations.Activation;
@ -53,11 +54,11 @@ public class GlobalPoolingGradientCheckTests extends BaseDL4JTest {
private static final double DEFAULT_MIN_ABS_ERROR = 1e-8; private static final double DEFAULT_MIN_ABS_ERROR = 1e-8;
@Test @Test
public void testLSTMGlobalPoolingBasicMultiLayer() { public void testRNNGlobalPoolingBasicMultiLayer() {
//Basic test of global pooling w/ LSTM //Basic test of global pooling w/ LSTM
Nd4j.getRandom().setSeed(12345L); Nd4j.getRandom().setSeed(12345L);
int timeSeriesLength = 10; int timeSeriesLength = 5;
int nIn = 5; int nIn = 5;
int layerSize = 4; int layerSize = 4;
int nOut = 2; int nOut = 2;
@ -73,7 +74,7 @@ public class GlobalPoolingGradientCheckTests extends BaseDL4JTest {
.dataType(DataType.DOUBLE) .dataType(DataType.DOUBLE)
.updater(new NoOp()) .updater(new NoOp())
.dist(new NormalDistribution(0, 1.0)).seed(12345L).list() .dist(new NormalDistribution(0, 1.0)).seed(12345L).list()
.layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH) .layer(0, new SimpleRnn.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH)
.build()) .build())
.layer(1, new GlobalPoolingLayer.Builder().poolingType(pt).build()) .layer(1, new GlobalPoolingLayer.Builder().poolingType(pt).build())
.layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
@ -84,20 +85,9 @@ public class GlobalPoolingGradientCheckTests extends BaseDL4JTest {
mln.init(); mln.init();
Random r = new Random(12345L); Random r = new Random(12345L);
INDArray input = Nd4j.zeros(miniBatchSize, nIn, timeSeriesLength); INDArray input = Nd4j.rand(DataType.DOUBLE, miniBatchSize, nIn, timeSeriesLength).subi(0.5);
for (int i = 0; i < miniBatchSize; i++) {
for (int j = 0; j < nIn; j++) {
for (int k = 0; k < timeSeriesLength; k++) {
input.putScalar(new int[] {i, j, k}, r.nextDouble() - 0.5);
}
}
}
INDArray labels = Nd4j.zeros(miniBatchSize, nOut); INDArray labels = TestUtils.randomOneHot(miniBatchSize, nOut).castTo(DataType.DOUBLE);
for (int i = 0; i < miniBatchSize; i++) {
int idx = r.nextInt(nOut);
labels.putScalar(i, idx, 1.0);
}
if (PRINT_RESULTS) { if (PRINT_RESULTS) {
System.out.println("testLSTMGlobalPoolingBasicMultiLayer() - " + pt + ", minibatch = " System.out.println("testLSTMGlobalPoolingBasicMultiLayer() - " + pt + ", minibatch = "
@ -175,12 +165,12 @@ public class GlobalPoolingGradientCheckTests extends BaseDL4JTest {
@Test @Test
public void testLSTMWithMasking() { public void testLSTMWithMasking() {
//Basic test of GravesLSTM layer //Basic test of LSTM layer
Nd4j.getRandom().setSeed(12345L); Nd4j.getRandom().setSeed(12345L);
int timeSeriesLength = 10; int timeSeriesLength = 5;
int nIn = 5; int nIn = 4;
int layerSize = 4; int layerSize = 3;
int nOut = 2; int nOut = 2;
int miniBatchSize = 3; int miniBatchSize = 3;
@ -193,7 +183,7 @@ public class GlobalPoolingGradientCheckTests extends BaseDL4JTest {
.dataType(DataType.DOUBLE) .dataType(DataType.DOUBLE)
.updater(new NoOp()) .updater(new NoOp())
.dist(new NormalDistribution(0, 1.0)).seed(12345L).list() .dist(new NormalDistribution(0, 1.0)).seed(12345L).list()
.layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH) .layer(0, new LSTM.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH)
.build()) .build())
.layer(1, new GlobalPoolingLayer.Builder().poolingType(pt).build()) .layer(1, new GlobalPoolingLayer.Builder().poolingType(pt).build())
.layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
@ -204,14 +194,7 @@ public class GlobalPoolingGradientCheckTests extends BaseDL4JTest {
mln.init(); mln.init();
Random r = new Random(12345L); Random r = new Random(12345L);
INDArray input = Nd4j.zeros(miniBatchSize, nIn, timeSeriesLength); INDArray input = Nd4j.rand(DataType.DOUBLE, miniBatchSize, nIn, timeSeriesLength).subi(0.5);
for (int i = 0; i < miniBatchSize; i++) {
for (int j = 0; j < nIn; j++) {
for (int k = 0; k < timeSeriesLength; k++) {
input.putScalar(new int[] {i, j, k}, r.nextDouble() - 0.5);
}
}
}
INDArray featuresMask = Nd4j.create(miniBatchSize, timeSeriesLength); INDArray featuresMask = Nd4j.create(miniBatchSize, timeSeriesLength);
for (int i = 0; i < miniBatchSize; i++) { for (int i = 0; i < miniBatchSize; i++) {
@ -221,12 +204,7 @@ public class GlobalPoolingGradientCheckTests extends BaseDL4JTest {
} }
} }
INDArray labels = Nd4j.zeros(miniBatchSize, nOut); INDArray labels = TestUtils.randomOneHot(miniBatchSize, nOut);
for (int i = 0; i < miniBatchSize; i++) {
int idx = r.nextInt(nOut);
labels.putScalar(i, idx, 1.0);
}
mln.setLayerMaskArrays(featuresMask, null); mln.setLayerMaskArrays(featuresMask, null);
if (PRINT_RESULTS) { if (PRINT_RESULTS) {

View File

@ -32,6 +32,7 @@ import org.deeplearning4j.nn.conf.graph.rnn.LastTimeStepVertex;
import org.deeplearning4j.nn.conf.graph.rnn.ReverseTimeSeriesVertex; import org.deeplearning4j.nn.conf.graph.rnn.ReverseTimeSeriesVertex;
import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.*; import org.deeplearning4j.nn.conf.layers.*;
import org.deeplearning4j.nn.conf.layers.recurrent.SimpleRnn;
import org.deeplearning4j.nn.conf.preprocessor.CnnToFeedForwardPreProcessor; import org.deeplearning4j.nn.conf.preprocessor.CnnToFeedForwardPreProcessor;
import org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor; import org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor;
import org.deeplearning4j.nn.conf.preprocessor.RnnToFeedForwardPreProcessor; import org.deeplearning4j.nn.conf.preprocessor.RnnToFeedForwardPreProcessor;
@ -334,7 +335,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest {
} }
@Test @Test
public void testLSTMWithMerging() { public void testRNNWithMerging() {
Nd4j.getRandom().setSeed(12345); Nd4j.getRandom().setSeed(12345);
ComputationGraphConfiguration conf = ComputationGraphConfiguration conf =
@ -345,23 +346,23 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest {
.updater(new NoOp()).graphBuilder().addInputs("input") .updater(new NoOp()).graphBuilder().addInputs("input")
.setOutputs("out") .setOutputs("out")
.addLayer("lstm1", .addLayer("lstm1",
new GravesLSTM.Builder().nIn(3).nOut(4) new SimpleRnn.Builder().nIn(3).nOut(3)
.activation(Activation.TANH).build(), .activation(Activation.TANH).build(),
"input") "input")
.addLayer("lstm2", .addLayer("lstm2",
new GravesLSTM.Builder().nIn(4).nOut(4) new SimpleRnn.Builder().nIn(3).nOut(3)
.activation(Activation.TANH).build(), .activation(Activation.TANH).build(),
"lstm1") "lstm1")
.addLayer("dense1", .addLayer("dense1",
new DenseLayer.Builder().nIn(4).nOut(4) new DenseLayer.Builder().nIn(3).nOut(3)
.activation(Activation.SIGMOID).build(), .activation(Activation.SIGMOID).build(),
"lstm1") "lstm1")
.addLayer("lstm3", .addLayer("lstm3",
new GravesLSTM.Builder().nIn(4).nOut(4) new SimpleRnn.Builder().nIn(3).nOut(3)
.activation(Activation.TANH).build(), .activation(Activation.TANH).build(),
"dense1") "dense1")
.addVertex("merge", new MergeVertex(), "lstm2", "lstm3") .addVertex("merge", new MergeVertex(), "lstm2", "lstm3")
.addLayer("out", new RnnOutputLayer.Builder().nIn(8).nOut(3) .addLayer("out", new RnnOutputLayer.Builder().nIn(6).nOut(3)
.activation(Activation.SOFTMAX) .activation(Activation.SOFTMAX)
.lossFunction(LossFunctions.LossFunction.MCXENT).build(), .lossFunction(LossFunctions.LossFunction.MCXENT).build(),
"merge") "merge")
@ -373,13 +374,8 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest {
graph.init(); graph.init();
Random r = new Random(12345); Random r = new Random(12345);
INDArray input = Nd4j.rand(new int[] {3, 3, 5}); INDArray input = Nd4j.rand(new int[] {2, 3, 4});
INDArray labels = Nd4j.zeros(3, 3, 5); INDArray labels = TestUtils.randomOneHotTimeSeries(2, 3, 4);
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 5; j++) {
labels.putScalar(new int[] {i, r.nextInt(3), j}, 1.0);
}
}
if (PRINT_RESULTS) { if (PRINT_RESULTS) {
System.out.println("testLSTMWithMerging()"); System.out.println("testLSTMWithMerging()");
@ -401,13 +397,12 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest {
Nd4j.getRandom().setSeed(1234); Nd4j.getRandom().setSeed(1234);
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(1234) ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(1234)
.dataType(DataType.DOUBLE) .dataType(DataType.DOUBLE)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(new NormalDistribution(0, 1))
.dist(new NormalDistribution(0, 1))
.updater(new NoOp()).graphBuilder().addInputs("input").setOutputs("out") .updater(new NoOp()).graphBuilder().addInputs("input").setOutputs("out")
.addLayer("lstm1", new GravesLSTM.Builder().nIn(3).nOut(8).activation(Activation.TANH).build(), .addLayer("lstm1", new LSTM.Builder().nIn(3).nOut(6).activation(Activation.TANH).build(),
"input") "input")
.addVertex("subset", new SubsetVertex(0, 3), "lstm1") .addVertex("subset", new SubsetVertex(0, 2), "lstm1")
.addLayer("out", new RnnOutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX) .addLayer("out", new RnnOutputLayer.Builder().nIn(3).nOut(2).activation(Activation.SOFTMAX)
.lossFunction(LossFunctions.LossFunction.MCXENT).build(), "subset") .lossFunction(LossFunctions.LossFunction.MCXENT).build(), "subset")
.build(); .build();
@ -415,13 +410,8 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest {
graph.init(); graph.init();
Random r = new Random(12345); Random r = new Random(12345);
INDArray input = Nd4j.rand(new int[] {3, 3, 5}); INDArray input = Nd4j.rand(new int[] {2, 3, 4});
INDArray labels = Nd4j.zeros(3, 3, 5); INDArray labels = TestUtils.randomOneHotTimeSeries(2, 2, 4);
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 5; j++) {
labels.putScalar(new int[] {i, r.nextInt(3), j}, 1.0);
}
}
if (PRINT_RESULTS) { if (PRINT_RESULTS) {
System.out.println("testLSTMWithSubset()"); System.out.println("testLSTMWithSubset()");
@ -447,10 +437,10 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest {
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
.dist(new NormalDistribution(0, 1)) .dist(new NormalDistribution(0, 1))
.updater(new NoOp()).graphBuilder().addInputs("input").setOutputs("out") .updater(new NoOp()).graphBuilder().addInputs("input").setOutputs("out")
.addLayer("lstm1", new GravesLSTM.Builder().nIn(3).nOut(4).activation(Activation.TANH).build(), .addLayer("lstm1", new LSTM.Builder().nIn(3).nOut(4).activation(Activation.TANH).build(),
"input") "input")
.addVertex("lastTS", new LastTimeStepVertex("input"), "lstm1") .addVertex("lastTS", new LastTimeStepVertex("input"), "lstm1")
.addLayer("out", new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX) .addLayer("out", new OutputLayer.Builder().nIn(4).nOut(2).activation(Activation.SOFTMAX)
.lossFunction(LossFunctions.LossFunction.MCXENT).build(), "lastTS") .lossFunction(LossFunctions.LossFunction.MCXENT).build(), "lastTS")
.build(); .build();
@ -458,11 +448,8 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest {
graph.init(); graph.init();
Random r = new Random(12345); Random r = new Random(12345);
INDArray input = Nd4j.rand(new int[] {3, 3, 5}); INDArray input = Nd4j.rand(new int[] {2, 3, 4});
INDArray labels = Nd4j.zeros(3, 3); //Here: labels are 2d (due to LastTimeStepVertex) INDArray labels = TestUtils.randomOneHot(2, 2); //Here: labels are 2d (due to LastTimeStepVertex)
for (int i = 0; i < 3; i++) {
labels.putScalar(new int[] {i, r.nextInt(3)}, 1.0);
}
if (PRINT_RESULTS) { if (PRINT_RESULTS) {
System.out.println("testLSTMWithLastTimeStepVertex()"); System.out.println("testLSTMWithLastTimeStepVertex()");
@ -503,16 +490,16 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest {
.updater(new NoOp()).graphBuilder() .updater(new NoOp()).graphBuilder()
.addInputs("input1", "input2").setOutputs("out") .addInputs("input1", "input2").setOutputs("out")
.addLayer("lstm1", .addLayer("lstm1",
new GravesLSTM.Builder().nIn(3).nOut(4) new LSTM.Builder().nIn(3).nOut(3)
.activation(Activation.TANH).build(), .activation(Activation.TANH).build(),
"input1") "input1")
.addLayer("lstm2", .addLayer("lstm2",
new GravesLSTM.Builder().nIn(4).nOut(5) new LSTM.Builder().nIn(2).nOut(4)
.activation(Activation.SOFTSIGN).build(), .activation(Activation.SOFTSIGN).build(),
"input2") "input2")
.addVertex("lastTS", new LastTimeStepVertex("input2"), "lstm2") .addVertex("lastTS", new LastTimeStepVertex("input2"), "lstm2")
.addVertex("duplicate", new DuplicateToTimeSeriesVertex("input2"), "lastTS") .addVertex("duplicate", new DuplicateToTimeSeriesVertex("input2"), "lastTS")
.addLayer("out", new RnnOutputLayer.Builder().nIn(5 + 4).nOut(3) .addLayer("out", new RnnOutputLayer.Builder().nIn(3+4).nOut(2)
.activation(Activation.SOFTMAX) .activation(Activation.SOFTMAX)
.lossFunction(LossFunctions.LossFunction.MCXENT).build(), .lossFunction(LossFunctions.LossFunction.MCXENT).build(),
"lstm1", "duplicate") "lstm1", "duplicate")
@ -522,14 +509,9 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest {
graph.init(); graph.init();
Random r = new Random(12345); Random r = new Random(12345);
INDArray input1 = Nd4j.rand(new int[] {3, 3, 5}); INDArray input1 = Nd4j.rand(new int[] {2, 3, 4});
INDArray input2 = Nd4j.rand(new int[] {3, 4, 5}); INDArray input2 = Nd4j.rand(new int[] {2, 2, 4});
INDArray labels = Nd4j.zeros(3, 3, 5); INDArray labels = TestUtils.randomOneHotTimeSeries(2, 2, 4);
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 5; j++) {
labels.putScalar(new int[] {i, r.nextInt(3), j}, 1.0);
}
}
if (PRINT_RESULTS) { if (PRINT_RESULTS) {
System.out.println("testLSTMWithDuplicateToTimeSeries()"); System.out.println("testLSTMWithDuplicateToTimeSeries()");
@ -558,16 +540,16 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest {
.updater(new NoOp()).graphBuilder() .updater(new NoOp()).graphBuilder()
.addInputs("input").setOutputs("out") .addInputs("input").setOutputs("out")
.addLayer("lstm_a", .addLayer("lstm_a",
new GravesLSTM.Builder().nIn(3).nOut(4) new LSTM.Builder().nIn(2).nOut(3)
.activation(Activation.TANH).build(), .activation(Activation.TANH).build(),
"input") "input")
.addVertex("input_rev", new ReverseTimeSeriesVertex("input"), "input") .addVertex("input_rev", new ReverseTimeSeriesVertex("input"), "input")
.addLayer("lstm_b", .addLayer("lstm_b",
new GravesLSTM.Builder().nIn(3).nOut(4) new LSTM.Builder().nIn(2).nOut(3)
.activation(Activation.TANH).build(), .activation(Activation.TANH).build(),
"input_rev") "input_rev")
.addVertex("lstm_b_rev", new ReverseTimeSeriesVertex("input"), "lstm_b") .addVertex("lstm_b_rev", new ReverseTimeSeriesVertex("input"), "lstm_b")
.addLayer("out", new RnnOutputLayer.Builder().nIn(4 + 4).nOut(3) .addLayer("out", new RnnOutputLayer.Builder().nIn(3 + 3).nOut(2)
.activation(Activation.SOFTMAX) .activation(Activation.SOFTMAX)
.lossFunction(LossFunctions.LossFunction.MCXENT).build(), .lossFunction(LossFunctions.LossFunction.MCXENT).build(),
"lstm_a", "lstm_b_rev") "lstm_a", "lstm_b_rev")
@ -577,13 +559,8 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest {
graph.init(); graph.init();
Random r = new Random(12345); Random r = new Random(12345);
INDArray input = Nd4j.rand(new int[] {3, 3, 5}); INDArray input = Nd4j.rand(new int[] {2, 2, 4});
INDArray labels = Nd4j.zeros(3, 3, 5); INDArray labels = TestUtils.randomOneHotTimeSeries(2, 2, 4);
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 5; j++) {
labels.putScalar(new int[] {i, r.nextInt(3), j}, 1.0);
}
}
if (PRINT_RESULTS) { if (PRINT_RESULTS) {
System.out.println("testLSTMWithReverseTimeSeriesVertex()"); System.out.println("testLSTMWithReverseTimeSeriesVertex()");
@ -1171,10 +1148,10 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest {
.dist(new NormalDistribution(0, 1)) .dist(new NormalDistribution(0, 1))
.activation(Activation.TANH).updater(new NoOp()).graphBuilder() .activation(Activation.TANH).updater(new NoOp()).graphBuilder()
.addInputs("in1", "in2") .addInputs("in1", "in2")
.addLayer("d0", new GravesLSTM.Builder().nIn(layerSizes).nOut(layerSizes).build(), "in1") .addLayer("d0", new SimpleRnn.Builder().nIn(layerSizes).nOut(layerSizes).build(), "in1")
.addLayer("d1", new GravesLSTM.Builder().nIn(layerSizes).nOut(layerSizes).build(), "in2") .addLayer("d1", new SimpleRnn.Builder().nIn(layerSizes).nOut(layerSizes).build(), "in2")
.addVertex("stack", new StackVertex(), "d0", "d1") .addVertex("stack", new StackVertex(), "d0", "d1")
.addLayer("d2", new GravesLSTM.Builder().nIn(layerSizes).nOut(layerSizes).build(), "stack") .addLayer("d2", new SimpleRnn.Builder().nIn(layerSizes).nOut(layerSizes).build(), "stack")
.addVertex("u1", new UnstackVertex(0, 2), "d2").addVertex("u2", new UnstackVertex(1, 2), "d2") .addVertex("u1", new UnstackVertex(0, 2), "d2").addVertex("u2", new UnstackVertex(1, 2), "d2")
.addLayer("p1", new GlobalPoolingLayer.Builder(PoolingType.AVG).build(), "u1") .addLayer("p1", new GlobalPoolingLayer.Builder(PoolingType.AVG).build(), "u1")
.addLayer("p2", new GlobalPoolingLayer.Builder(PoolingType.AVG).build(), "u2") .addLayer("p2", new GlobalPoolingLayer.Builder(PoolingType.AVG).build(), "u2")
@ -1193,7 +1170,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest {
INDArray newParams = Nd4j.rand(new long[]{1, nParams}); INDArray newParams = Nd4j.rand(new long[]{1, nParams});
graph.setParams(newParams); graph.setParams(newParams);
int[] mbSizes = new int[] {1, 3, 10}; int[] mbSizes = new int[] {1, 2, 3};
for (int minibatch : mbSizes) { for (int minibatch : mbSizes) {
INDArray in1 = Nd4j.rand(new int[] {minibatch, layerSizes, 4}); INDArray in1 = Nd4j.rand(new int[] {minibatch, layerSizes, 4});

View File

@ -25,6 +25,7 @@ import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.*; import org.deeplearning4j.nn.conf.layers.*;
import org.deeplearning4j.nn.conf.layers.recurrent.SimpleRnn;
import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.graph.ComputationGraph;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
import org.junit.Test; import org.junit.Test;
@ -85,7 +86,7 @@ public class GradientCheckTestsMasking extends BaseDL4JTest {
mask[3] = new boolean[] {false, false, true, false, true}; //time series classification w/ variable length TS mask[3] = new boolean[] {false, false, true, false, true}; //time series classification w/ variable length TS
mask[4] = new boolean[] {true, true, true, false, true}; //variable length TS mask[4] = new boolean[] {true, true, true, false, true}; //variable length TS
int nIn = 4; int nIn = 3;
int layerSize = 3; int layerSize = 3;
GradientCheckSimpleScenario[] scenarios = new GradientCheckSimpleScenario[] { GradientCheckSimpleScenario[] scenarios = new GradientCheckSimpleScenario[] {
@ -94,23 +95,14 @@ public class GradientCheckTestsMasking extends BaseDL4JTest {
new GradientCheckSimpleScenario(LossMixtureDensity.builder().gaussians(2).labelWidth(3).build(), new GradientCheckSimpleScenario(LossMixtureDensity.builder().gaussians(2).labelWidth(3).build(),
Activation.TANH, 10, 3), Activation.TANH, 10, 3),
new GradientCheckSimpleScenario(LossMixtureDensity.builder().gaussians(2).labelWidth(4).build(), new GradientCheckSimpleScenario(LossMixtureDensity.builder().gaussians(2).labelWidth(4).build(),
Activation.IDENTITY, 12, 4), Activation.IDENTITY, 12, 4)};
new GradientCheckSimpleScenario(LossFunctions.LossFunction.L2.getILossFunction(),
Activation.SOFTMAX, 2, 2)};
for (GradientCheckSimpleScenario s : scenarios) { for (GradientCheckSimpleScenario s : scenarios) {
Random r = new Random(12345L); Random r = new Random(12345L);
INDArray input = Nd4j.zeros(1, nIn, timeSeriesLength); INDArray input = Nd4j.rand(DataType.DOUBLE, 1, nIn, timeSeriesLength).subi(0.5);
for (int m = 0; m < 1; m++) {
for (int j = 0; j < nIn; j++) {
for (int k = 0; k < timeSeriesLength; k++) {
input.putScalar(new int[] {m, j, k}, r.nextDouble() - 0.5);
}
}
}
INDArray labels = Nd4j.zeros(1, s.labelWidth, timeSeriesLength); INDArray labels = Nd4j.zeros(DataType.DOUBLE, 1, s.labelWidth, timeSeriesLength);
for (int m = 0; m < 1; m++) { for (int m = 0; m < 1; m++) {
for (int j = 0; j < timeSeriesLength; j++) { for (int j = 0; j < timeSeriesLength; j++) {
int idx = r.nextInt(s.labelWidth); int idx = r.nextInt(s.labelWidth);
@ -127,15 +119,14 @@ public class GradientCheckTestsMasking extends BaseDL4JTest {
} }
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345L) MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345L)
.dataType(DataType.DOUBLE) .dataType(DataType.DOUBLE)
.list() .updater(new NoOp())
.layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize) .list()
.dist(new NormalDistribution(0, 1)) .layer(0, new SimpleRnn.Builder().nIn(nIn).nOut(layerSize)
.updater(new NoOp()).build()) .weightInit(new NormalDistribution(0, 1)).build())
.layer(1, new RnnOutputLayer.Builder(s.lf).activation(s.act).nIn(layerSize).nOut(s.nOut) .layer(1, new RnnOutputLayer.Builder(s.lf).activation(s.act).nIn(layerSize).nOut(s.nOut)
.dist(new NormalDistribution(0, 1)) .weightInit(new NormalDistribution(0, 1)).build())
.updater(new NoOp()).build()) .build();
.build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf); MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init(); mln.init();
@ -156,15 +147,14 @@ public class GradientCheckTestsMasking extends BaseDL4JTest {
int timeSeriesLength = 5; int timeSeriesLength = 5;
int nIn = 5; int nIn = 5;
int layerSize = 4; int layerSize = 3;
int nOut = 3; int nOut = 3;
int miniBatchSize = 3; int miniBatchSize = 2;
INDArray[] masks = new INDArray[] {null, INDArray[] masks = new INDArray[] {
Nd4j.create(new double[][] {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}), Nd4j.create(new double[][] {{1, 1, 1, 1, 1}, {1, 1, 1, 0, 0}}),
Nd4j.create(new double[][] {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 0}, {1, 1, 1, 0, 0}}), Nd4j.create(new double[][] {{1, 1, 1, 1, 1}, {0, 1, 1, 1, 1}})};
Nd4j.create(new double[][] {{1, 1, 1, 1, 1}, {0, 1, 1, 1, 1}, {0, 0, 1, 1, 1}})};
int testNum = 0; int testNum = 0;
for (INDArray mask : masks) { for (INDArray mask : masks) {
@ -201,7 +191,7 @@ public class GradientCheckTestsMasking extends BaseDL4JTest {
} }
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels, mask, mask); DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels, mask, mask, true, 16);
assertTrue(gradOK); assertTrue(gradOK);
TestUtils.testModelSerialization(mln); TestUtils.testModelSerialization(mln);
@ -295,9 +285,9 @@ public class GradientCheckTestsMasking extends BaseDL4JTest {
//For RNNs: per-output masking uses 3d masks (same shape as output/labels), as compared to the standard //For RNNs: per-output masking uses 3d masks (same shape as output/labels), as compared to the standard
// 2d masks (used for per *example* masking) // 2d masks (used for per *example* masking)
int nIn = 4; int nIn = 3;
int layerSize = 4; int layerSize = 3;
int nOut = 4; int nOut = 2;
//1 example, TS length 3 //1 example, TS length 3
INDArray mask1 = Nd4j.create(new double[] {1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0}, new int[] {1, nOut, 3}, 'f'); INDArray mask1 = Nd4j.create(new double[] {1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0}, new int[] {1, nOut, 3}, 'f');
@ -358,7 +348,7 @@ public class GradientCheckTestsMasking extends BaseDL4JTest {
.dataType(DataType.DOUBLE) .dataType(DataType.DOUBLE)
.dist(new NormalDistribution(0, 1)).seed(12345) .dist(new NormalDistribution(0, 1)).seed(12345)
.list() .list()
.layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH) .layer(0, new SimpleRnn.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH)
.build()) .build())
.layer(1, new RnnOutputLayer.Builder().nIn(layerSize).nOut(nOut).lossFunction(lf) .layer(1, new RnnOutputLayer.Builder().nIn(layerSize).nOut(nOut).lossFunction(lf)
.activation(a).build()) .activation(a).build())
@ -390,7 +380,7 @@ public class GradientCheckTestsMasking extends BaseDL4JTest {
.dataType(DataType.DOUBLE) .dataType(DataType.DOUBLE)
.dist(new NormalDistribution(0, 2)).seed(12345) .dist(new NormalDistribution(0, 2)).seed(12345)
.graphBuilder().addInputs("in") .graphBuilder().addInputs("in")
.addLayer("0", new GravesLSTM.Builder().nIn(nIn).nOut(layerSize) .addLayer("0", new SimpleRnn.Builder().nIn(nIn).nOut(layerSize)
.activation(Activation.TANH).build(), "in") .activation(Activation.TANH).build(), "in")
.addLayer("1", new RnnOutputLayer.Builder().nIn(layerSize).nOut(nOut).lossFunction(lf) .addLayer("1", new RnnOutputLayer.Builder().nIn(layerSize).nOut(nOut).lossFunction(lf)
.activation(a).build(), "0") .activation(a).build(), "0")

View File

@ -139,11 +139,11 @@ public class LSTMGradientCheckTests extends BaseDL4JTest {
@Test @Test
public void testGradientLSTMFull() { public void testGradientLSTMFull() {
int timeSeriesLength = 8; int timeSeriesLength = 4;
int nIn = 7; int nIn = 3;
int layerSize = 9; int layerSize = 4;
int nOut = 4; int nOut = 2;
int miniBatchSize = 6; int miniBatchSize = 2;
boolean[] gravesLSTM = new boolean[] {true, false}; boolean[] gravesLSTM = new boolean[] {true, false};
@ -162,13 +162,13 @@ public class LSTMGradientCheckTests extends BaseDL4JTest {
//use l2vals[i] with l1vals[i] //use l2vals[i] with l1vals[i]
double[] l2vals = {0.4, 0.0, 0.4, 0.4}; double[] l2vals = {0.4, 0.0};
double[] l1vals = {0.0, 0.0, 0.5, 0.0}; double[] l1vals = {0.0, 0.5};
double[] biasL2 = {0.0, 0.0, 0.0, 0.2}; double[] biasL2 = {0.3, 0.0};
double[] biasL1 = {0.0, 0.0, 0.6, 0.0}; double[] biasL1 = {0.0, 0.6};
Activation[] activFns = {Activation.TANH, Activation.SOFTSIGN, Activation.TANH, Activation.TANH}; Activation[] activFns = {Activation.TANH, Activation.SOFTSIGN};
LossFunction[] lossFunctions = {LossFunction.MCXENT, LossFunction.MSE, LossFunction.MSE, LossFunction.MCXENT}; LossFunction[] lossFunctions = {LossFunction.MCXENT, LossFunction.MSE};
Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH, Activation.IDENTITY, Activation.SOFTMAX}; Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH};
for (int i = 0; i < l2vals.length; i++) { for (int i = 0; i < l2vals.length; i++) {
@ -218,7 +218,7 @@ public class LSTMGradientCheckTests extends BaseDL4JTest {
} }
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels, null, null, true, 128);
assertTrue(testName, gradOK); assertTrue(testName, gradOK);
TestUtils.testModelSerialization(mln); TestUtils.testModelSerialization(mln);
@ -233,9 +233,9 @@ public class LSTMGradientCheckTests extends BaseDL4JTest {
int[] timeSeriesLength = {1, 5, 1}; int[] timeSeriesLength = {1, 5, 1};
int[] miniBatchSize = {7, 1, 1}; int[] miniBatchSize = {7, 1, 1};
int nIn = 7; int nIn = 3;
int layerSize = 9; int layerSize = 4;
int nOut = 4; int nOut = 2;
boolean[] gravesLSTM = new boolean[] {true, false}; boolean[] gravesLSTM = new boolean[] {true, false};
@ -244,22 +244,9 @@ public class LSTMGradientCheckTests extends BaseDL4JTest {
for (int i = 0; i < timeSeriesLength.length; i++) { for (int i = 0; i < timeSeriesLength.length; i++) {
Random r = new Random(12345L); Random r = new Random(12345L);
INDArray input = Nd4j.zeros(miniBatchSize[i], nIn, timeSeriesLength[i]); INDArray input = Nd4j.rand(DataType.DOUBLE, miniBatchSize[i], nIn, timeSeriesLength[i]);
for (int m = 0; m < miniBatchSize[i]; m++) {
for (int j = 0; j < nIn; j++) {
for (int k = 0; k < timeSeriesLength[i]; k++) {
input.putScalar(new int[] {m, j, k}, r.nextDouble() - 0.5);
}
}
}
INDArray labels = Nd4j.zeros(miniBatchSize[i], nOut, timeSeriesLength[i]); INDArray labels = TestUtils.randomOneHotTimeSeries(miniBatchSize[i], nOut, timeSeriesLength[i]);
for (int m = 0; m < miniBatchSize[i]; m++) {
for (int j = 0; j < timeSeriesLength[i]; j++) {
int idx = r.nextInt(nOut);
labels.putScalar(new int[] {m, idx, j}, 1.0f);
}
}
Layer layer; Layer layer;
if (graves) { if (graves) {
@ -296,91 +283,75 @@ public class LSTMGradientCheckTests extends BaseDL4JTest {
LossFunction[] lossFunctions = {LossFunction.MCXENT, LossFunction.MSE}; LossFunction[] lossFunctions = {LossFunction.MCXENT, LossFunction.MSE};
Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here
int timeSeriesLength = 4; int timeSeriesLength = 3;
int nIn = 2; int nIn = 2;
int layerSize = 2; int layerSize = 2;
int nOut = 2; int nOut = 2;
int miniBatchSize = 3; int miniBatchSize = 3;
Random r = new Random(12345L); Random r = new Random(12345L);
INDArray input = Nd4j.zeros(miniBatchSize, nIn, timeSeriesLength); INDArray input = Nd4j.rand(DataType.DOUBLE, miniBatchSize, nIn, timeSeriesLength).subi(0.5);
for (int i = 0; i < miniBatchSize; i++) {
for (int j = 0; j < nIn; j++) {
for (int k = 0; k < timeSeriesLength; k++) {
input.putScalar(new int[] {i, j, k}, r.nextDouble() - 0.5);
}
}
}
INDArray labels = Nd4j.zeros(miniBatchSize, nOut, timeSeriesLength);
for (int i = 0; i < miniBatchSize; i++) {
for (int j = 0; j < timeSeriesLength; j++) {
int idx = r.nextInt(nOut);
labels.putScalar(new int[] {i, idx, j}, 1.0f);
}
}
INDArray labels = TestUtils.randomOneHotTimeSeries(miniBatchSize, nOut, timeSeriesLength);
//use l2vals[i] with l1vals[i] //use l2vals[i] with l1vals[i]
double[] l2vals = {0.4, 0.0, 0.4, 0.4}; double[] l2vals = {0.4, 0.0};
double[] l1vals = {0.0, 0.0, 0.5, 0.0}; double[] l1vals = {0.5, 0.0};
double[] biasL2 = {0.0, 0.0, 0.0, 0.2}; double[] biasL2 = {0.0, 0.2};
double[] biasL1 = {0.0, 0.0, 0.6, 0.0}; double[] biasL1 = {0.0, 0.6};
for (Activation afn : activFns) { for (int i = 0; i < lossFunctions.length; i++) {
for (int i = 0; i < lossFunctions.length; i++) { for (int k = 0; k < l2vals.length; k++) {
for (int k = 0; k < l2vals.length; k++) { Activation afn = activFns[i];
LossFunction lf = lossFunctions[i]; LossFunction lf = lossFunctions[i];
Activation outputActivation = outputActivations[i]; Activation outputActivation = outputActivations[i];
double l2 = l2vals[k]; double l2 = l2vals[k];
double l1 = l1vals[k]; double l1 = l1vals[k];
NeuralNetConfiguration.Builder conf = NeuralNetConfiguration.Builder conf =
new NeuralNetConfiguration.Builder(); new NeuralNetConfiguration.Builder();
if (l1 > 0.0) if (l1 > 0.0)
conf.l1(l1); conf.l1(l1);
if (l2 > 0.0) if (l2 > 0.0)
conf.l2(l2); conf.l2(l2);
if (biasL2[k] > 0) if (biasL2[k] > 0)
conf.l2Bias(biasL2[k]); conf.l2Bias(biasL2[k]);
if (biasL1[k] > 0) if (biasL1[k] > 0)
conf.l1Bias(biasL1[k]); conf.l1Bias(biasL1[k]);
MultiLayerConfiguration mlc = conf.seed(12345L) MultiLayerConfiguration mlc = conf.seed(12345L)
.dataType(DataType.DOUBLE) .dataType(DataType.DOUBLE)
.list().layer(0, .updater(new NoOp())
new GravesBidirectionalLSTM.Builder().nIn(nIn).nOut(layerSize) .list().layer(0,
new GravesBidirectionalLSTM.Builder().nIn(nIn).nOut(layerSize)
.dist(new NormalDistribution(0, 1)) .weightInit(new NormalDistribution(0, 1))
.activation(afn).updater( .activation(afn)
Updater.NONE) .build())
.build()) .layer(1, new RnnOutputLayer.Builder(lf).activation(outputActivation).nIn(layerSize)
.layer(1, new RnnOutputLayer.Builder(lf).activation(outputActivation).nIn(layerSize) .nOut(nOut)
.nOut(nOut) .dist(new NormalDistribution(0, 1)).updater(new NoOp()).build())
.dist(new NormalDistribution(0, 1)).updater(new NoOp()).build()) .build();
.build();
MultiLayerNetwork mln = new MultiLayerNetwork(mlc); MultiLayerNetwork mln = new MultiLayerNetwork(mlc);
mln.init(); mln.init();
if (PRINT_RESULTS) { if (PRINT_RESULTS) {
System.out.println("testGradientGravesBidirectionalLSTMFull() - activationFn=" + afn System.out.println("testGradientGravesBidirectionalLSTMFull() - activationFn=" + afn
+ ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", l2=" + l2 + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", l2=" + l2
+ ", l1=" + l1); + ", l1=" + l1);
for (int j = 0; j < mln.getnLayers(); j++) for (int j = 0; j < mln.getnLayers(); j++)
System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams()); System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
String msg = "testGradientGravesLSTMFull() - activationFn=" + afn + ", lossFn=" + lf
+ ", outputActivation=" + outputActivation + ", l2=" + l2 + ", l1=" + l1;
assertTrue(msg, gradOK);
TestUtils.testModelSerialization(mln);
} }
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
String msg = "testGradientGravesLSTMFull() - activationFn=" + afn + ", lossFn=" + lf
+ ", outputActivation=" + outputActivation + ", l2=" + l2 + ", l1=" + l1;
assertTrue(msg, gradOK);
TestUtils.testModelSerialization(mln);
} }
} }
} }
@ -391,21 +362,14 @@ public class LSTMGradientCheckTests extends BaseDL4JTest {
int[] timeSeriesLength = {1, 5, 1}; int[] timeSeriesLength = {1, 5, 1};
int[] miniBatchSize = {7, 1, 1}; int[] miniBatchSize = {7, 1, 1};
int nIn = 7; int nIn = 3;
int layerSize = 9; int layerSize = 4;
int nOut = 4; int nOut = 2;
for (int i = 0; i < timeSeriesLength.length; i++) { for (int i = 0; i < timeSeriesLength.length; i++) {
Random r = new Random(12345L); Random r = new Random(12345L);
INDArray input = Nd4j.zeros(miniBatchSize[i], nIn, timeSeriesLength[i]); INDArray input = Nd4j.rand(DataType.DOUBLE, miniBatchSize[i], nIn, timeSeriesLength[i]).subi(0.5);
for (int m = 0; m < miniBatchSize[i]; m++) {
for (int j = 0; j < nIn; j++) {
for (int k = 0; k < timeSeriesLength[i]; k++) {
input.putScalar(new int[] {m, j, k}, r.nextDouble() - 0.5);
}
}
}
INDArray labels = Nd4j.zeros(miniBatchSize[i], nOut, timeSeriesLength[i]); INDArray labels = Nd4j.zeros(miniBatchSize[i], nOut, timeSeriesLength[i]);
for (int m = 0; m < miniBatchSize[i]; m++) { for (int m = 0; m < miniBatchSize[i]; m++) {
@ -431,7 +395,7 @@ public class LSTMGradientCheckTests extends BaseDL4JTest {
mln.init(); mln.init();
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels, null, null, true, 128);
String msg = "testGradientGravesLSTMEdgeCases() - timeSeriesLength=" + timeSeriesLength[i] String msg = "testGradientGravesLSTMEdgeCases() - timeSeriesLength=" + timeSeriesLength[i]
+ ", miniBatchSize=" + miniBatchSize[i]; + ", miniBatchSize=" + miniBatchSize[i];
@ -445,11 +409,11 @@ public class LSTMGradientCheckTests extends BaseDL4JTest {
//Test gradients with CNN -> FF -> LSTM -> RnnOutputLayer //Test gradients with CNN -> FF -> LSTM -> RnnOutputLayer
//time series input/output (i.e., video classification or similar) //time series input/output (i.e., video classification or similar)
int nChannelsIn = 3; int nChannelsIn = 2;
int inputSize = 10 * 10 * nChannelsIn; //10px x 10px x 3 channels int inputSize = 6 * 6 * nChannelsIn; //10px x 10px x 3 channels
int miniBatchSize = 4; int miniBatchSize = 2;
int timeSeriesLength = 10; int timeSeriesLength = 4;
int nClasses = 3; int nClasses = 2;
//Generate //Generate
Nd4j.getRandom().setSeed(12345); Nd4j.getRandom().setSeed(12345);
@ -467,18 +431,18 @@ public class LSTMGradientCheckTests extends BaseDL4JTest {
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new NoOp()).seed(12345) MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new NoOp()).seed(12345)
.dataType(DataType.DOUBLE) .dataType(DataType.DOUBLE)
.dist(new UniformDistribution(-2, 2)).list() .dist(new UniformDistribution(-2, 2)).list()
.layer(0, new ConvolutionLayer.Builder(5, 5).nIn(3).nOut(5).stride(1, 1) .layer(0, new ConvolutionLayer.Builder(3, 3).nIn(2).nOut(3).stride(1, 1)
.activation(Activation.TANH).build()) //Out: (10-5)/1+1 = 6 -> 6x6x5 .activation(Activation.TANH).build()) //Out: (10-5)/1+1 = 6 -> 6x6x5
.layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2) .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2)
.stride(1, 1).build()) //Out: (6-2)/1+1 = 5 -> 5x5x5 .stride(1, 1).build()) //Out: (6-2)/1+1 = 5 -> 5x5x5
.layer(2, new DenseLayer.Builder().nIn(5 * 5 * 5).nOut(4).activation(Activation.TANH).build()) .layer(2, new DenseLayer.Builder().nIn(27).nOut(4).activation(Activation.TANH).build())
.layer(3, new GravesLSTM.Builder().nIn(4).nOut(3).activation(Activation.TANH).build()) .layer(3, new GravesLSTM.Builder().nIn(4).nOut(3).activation(Activation.TANH).build())
.layer(4, new RnnOutputLayer.Builder().lossFunction(LossFunction.MCXENT).nIn(3).nOut(nClasses) .layer(4, new RnnOutputLayer.Builder().lossFunction(LossFunction.MCXENT).nIn(3).nOut(nClasses)
.activation(Activation.SOFTMAX).build()) .activation(Activation.SOFTMAX).build())
.setInputType(InputType.convolutional(10, 10, 3)).build(); .setInputType(InputType.convolutional(6, 6, 2)).build();
//Here: ConvolutionLayerSetup in config builder doesn't know that we are expecting time series input, not standard FF input -> override it here //Here: ConvolutionLayerSetup in config builder doesn't know that we are expecting time series input, not standard FF input -> override it here
conf.getInputPreProcessors().put(0, new RnnToCnnPreProcessor(10, 10, 3)); conf.getInputPreProcessors().put(0, new RnnToCnnPreProcessor(6, 6, 2));
MultiLayerNetwork mln = new MultiLayerNetwork(conf); MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init(); mln.init();
@ -489,7 +453,7 @@ public class LSTMGradientCheckTests extends BaseDL4JTest {
} }
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels, null, null, true, 32);
assertTrue(gradOK); assertTrue(gradOK);
TestUtils.testModelSerialization(mln); TestUtils.testModelSerialization(mln);
} }

View File

@ -68,6 +68,8 @@ public class RnnGradientChecks extends BaseDL4JTest {
for (boolean inputMask : new boolean[]{false, true}) { for (boolean inputMask : new boolean[]{false, true}) {
for (boolean simple : new boolean[]{false, true}) { for (boolean simple : new boolean[]{false, true}) {
for(boolean hasLayerNorm: new boolean[]{true, false}) { for(boolean hasLayerNorm: new boolean[]{true, false}) {
if(!simple && hasLayerNorm)
continue;
INDArray in = Nd4j.rand(new int[]{mb, nIn, tsLength}); INDArray in = Nd4j.rand(new int[]{mb, nIn, tsLength});
INDArray labels = Nd4j.create(mb, nOut, tsLength); INDArray labels = Nd4j.create(mb, nOut, tsLength);
@ -93,6 +95,11 @@ public class RnnGradientChecks extends BaseDL4JTest {
} }
for (Bidirectional.Mode m : modes) { for (Bidirectional.Mode m : modes) {
//Skip 3 of 4 test cases: from 64 to 16, which still should be good coverage
//Note RNG seed - deterministic run-to-run
if(r.nextInt(4) != 0)
continue;
String name = "mb=" + mb + ", maskType=" + maskType + ", mode=" + m + ", hasLayerNorm=" + hasLayerNorm + ", rnnType=" String name = "mb=" + mb + ", maskType=" + maskType + ", mode=" + m + ", hasLayerNorm=" + hasLayerNorm + ", rnnType="
+ (simple ? "SimpleRnn" : "LSTM"); + (simple ? "SimpleRnn" : "LSTM");
@ -144,6 +151,9 @@ public class RnnGradientChecks extends BaseDL4JTest {
for (boolean inputMask : new boolean[]{false, true}) { for (boolean inputMask : new boolean[]{false, true}) {
for (boolean hasLayerNorm : new boolean[]{true, false}) { for (boolean hasLayerNorm : new boolean[]{true, false}) {
for (int l = 0; l < l1s.length; l++) { for (int l = 0; l < l1s.length; l++) {
//Only run 1 of 5 (on average - note RNG seed for deterministic testing) - 25 of 128 test cases (to minimize test time)
if(r.nextInt(5) != 0)
continue;
INDArray in = Nd4j.rand(new int[]{mb, nIn, tsLength}); INDArray in = Nd4j.rand(new int[]{mb, nIn, tsLength});
INDArray labels = Nd4j.create(mb, nOut, tsLength); INDArray labels = Nd4j.create(mb, nOut, tsLength);
@ -217,6 +227,8 @@ public class RnnGradientChecks extends BaseDL4JTest {
for (boolean inputMask : new boolean[]{false, true}) { for (boolean inputMask : new boolean[]{false, true}) {
for (boolean simple : new boolean[]{false, true}) { for (boolean simple : new boolean[]{false, true}) {
for (boolean hasLayerNorm : new boolean[]{true, false}) { for (boolean hasLayerNorm : new boolean[]{true, false}) {
if(!simple && hasLayerNorm)
continue;
INDArray in = Nd4j.rand(new int[]{mb, nIn, tsLength}); INDArray in = Nd4j.rand(new int[]{mb, nIn, tsLength});
@ -265,7 +277,7 @@ public class RnnGradientChecks extends BaseDL4JTest {
net.init(); net.init();
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, in, labels, inMask, null); DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, in, labels, inMask, null, true, 16);
assertTrue(name, gradOK); assertTrue(name, gradOK);
TestUtils.testModelSerialization(net); TestUtils.testModelSerialization(net);
} }

View File

@ -26,6 +26,7 @@ import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.*; import org.deeplearning4j.nn.conf.layers.*;
import org.deeplearning4j.nn.conf.layers.misc.FrozenLayerWithBackprop; import org.deeplearning4j.nn.conf.layers.misc.FrozenLayerWithBackprop;
import org.deeplearning4j.nn.conf.layers.recurrent.SimpleRnn;
import org.deeplearning4j.nn.conf.layers.util.MaskLayer; import org.deeplearning4j.nn.conf.layers.util.MaskLayer;
import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.graph.ComputationGraph;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
@ -60,9 +61,9 @@ public class UtilLayerGradientChecks extends BaseDL4JTest {
@Test @Test
public void testMaskLayer() { public void testMaskLayer() {
Nd4j.getRandom().setSeed(12345); Nd4j.getRandom().setSeed(12345);
int tsLength = 5; int tsLength = 3;
for(int minibatch : new int[]{1,8}) { for(int minibatch : new int[]{1,3}) {
for (int inputRank : new int[]{2, 3, 4}) { for (int inputRank : new int[]{2, 3, 4}) {
for (boolean inputMask : new boolean[]{false, true}) { for (boolean inputMask : new boolean[]{false, true}) {
String maskType = (inputMask ? "inputMask" : "none"); String maskType = (inputMask ? "inputMask" : "none");
@ -74,7 +75,7 @@ public class UtilLayerGradientChecks extends BaseDL4JTest {
if(minibatch == 1){ if(minibatch == 1){
inMask = Nd4j.ones(1,1); inMask = Nd4j.ones(1,1);
} else { } else {
inMask = Nd4j.create(minibatch, 1); inMask = Nd4j.create(DataType.DOUBLE, minibatch, 1);
Nd4j.getExecutioner().exec(new BernoulliDistribution(inMask, 0.5)); Nd4j.getExecutioner().exec(new BernoulliDistribution(inMask, 0.5));
int count = inMask.sumNumber().intValue(); int count = inMask.sumNumber().intValue();
assertTrue(count >= 0 && count <= minibatch); //Sanity check on RNG seed assertTrue(count >= 0 && count <= minibatch); //Sanity check on RNG seed
@ -83,16 +84,16 @@ public class UtilLayerGradientChecks extends BaseDL4JTest {
case 4: case 4:
//Per-example mask (broadcast along all channels/x/y) //Per-example mask (broadcast along all channels/x/y)
if(minibatch == 1){ if(minibatch == 1){
inMask = Nd4j.ones(1,1, 1, 1); inMask = Nd4j.ones(DataType.DOUBLE, 1,1, 1, 1);
} else { } else {
inMask = Nd4j.create(minibatch, 1, 1, 1); inMask = Nd4j.create(DataType.DOUBLE, minibatch, 1, 1, 1);
Nd4j.getExecutioner().exec(new BernoulliDistribution(inMask, 0.5)); Nd4j.getExecutioner().exec(new BernoulliDistribution(inMask, 0.5));
int count = inMask.sumNumber().intValue(); int count = inMask.sumNumber().intValue();
assertTrue(count >= 0 && count <= minibatch); //Sanity check on RNG seed assertTrue(count >= 0 && count <= minibatch); //Sanity check on RNG seed
} }
break; break;
case 3: case 3:
inMask = Nd4j.ones(minibatch, tsLength); inMask = Nd4j.ones(DataType.DOUBLE, minibatch, tsLength);
for( int i=0; i<minibatch; i++ ){ for( int i=0; i<minibatch; i++ ){
for( int j=i+1; j<tsLength; j++ ){ for( int j=i+1; j<tsLength; j++ ){
inMask.putScalar(i,j,0.0); inMask.putScalar(i,j,0.0);
@ -108,11 +109,11 @@ public class UtilLayerGradientChecks extends BaseDL4JTest {
int[] labelShape; int[] labelShape;
switch (inputRank){ switch (inputRank){
case 2: case 2:
inShape = new int[]{minibatch, 5}; inShape = new int[]{minibatch, 3};
labelShape = inShape; labelShape = inShape;
break; break;
case 3: case 3:
inShape = new int[]{minibatch, 5, tsLength}; inShape = new int[]{minibatch, 3, tsLength};
labelShape = inShape; labelShape = inShape;
break; break;
case 4: case 4:
@ -134,18 +135,18 @@ public class UtilLayerGradientChecks extends BaseDL4JTest {
InputType it; InputType it;
switch (inputRank){ switch (inputRank){
case 2: case 2:
l1 = new DenseLayer.Builder().nOut(5).build(); l1 = new DenseLayer.Builder().nOut(3).build();
l2 = new DenseLayer.Builder().nOut(5).build(); l2 = new DenseLayer.Builder().nOut(3).build();
l3 = new OutputLayer.Builder().nOut(5).lossFunction(LossFunctions.LossFunction.MSE) l3 = new OutputLayer.Builder().nOut(3).lossFunction(LossFunctions.LossFunction.MSE)
.activation(Activation.TANH).build(); .activation(Activation.TANH).build();
it = InputType.feedForward(5); it = InputType.feedForward(3);
break; break;
case 3: case 3:
l1 = new LSTM.Builder().nIn(5).nOut(5).activation(Activation.TANH).build(); l1 = new SimpleRnn.Builder().nIn(3).nOut(3).activation(Activation.TANH).build();
l2 = new LSTM.Builder().nIn(5).nOut(5).activation(Activation.TANH).build(); l2 = new SimpleRnn.Builder().nIn(3).nOut(3).activation(Activation.TANH).build();
l3 = new RnnOutputLayer.Builder().nIn(5).nOut(5).lossFunction(LossFunctions.LossFunction.SQUARED_LOSS) l3 = new RnnOutputLayer.Builder().nIn(3).nOut(3).lossFunction(LossFunctions.LossFunction.SQUARED_LOSS)
.activation(Activation.IDENTITY).build(); .activation(Activation.IDENTITY).build();
it = InputType.recurrent(5); it = InputType.recurrent(3);
break; break;
case 4: case 4:
l1 = new ConvolutionLayer.Builder().nOut(5).convolutionMode(ConvolutionMode.Truncate) l1 = new ConvolutionLayer.Builder().nOut(5).convolutionMode(ConvolutionMode.Truncate)

View File

@ -138,28 +138,23 @@ public class VaeGradientCheckTests extends BaseDL4JTest {
@Test @Test
public void testVaePretrain() { public void testVaePretrain() {
Nd4j.getRandom().setSeed(12345); Nd4j.getRandom().setSeed(12345);
Activation[] activFns = {Activation.IDENTITY, Activation.TANH, Activation.IDENTITY, Activation.TANH}; Activation[] activFns = {Activation.IDENTITY, Activation.TANH, Activation.SOFTSIGN};
Activation[] pzxAfns = {Activation.IDENTITY, Activation.IDENTITY, Activation.TANH};
LossFunction[] lossFunctions = {LossFunction.MCXENT, LossFunction.MCXENT, LossFunction.MSE, LossFunction.MSE}; Activation[] pxzAfns = {Activation.TANH, Activation.TANH, Activation.IDENTITY};
Activation[] outputActivations = {Activation.SOFTMAX, Activation.SOFTMAX, Activation.TANH, Activation.TANH};
Activation[] pzxAfns = {Activation.IDENTITY, Activation.TANH, Activation.IDENTITY, Activation.TANH};
Activation[] pxzAfns = {Activation.TANH, Activation.IDENTITY, Activation.TANH, Activation.TANH};
//use l2vals[i] with l1vals[i] //use l2vals[i] with l1vals[i]
double[] l2vals = {0.4, 0.0, 0.4, 0.4}; double[] l2vals = {0.0, 0.4, 0.4};
double[] l1vals = {0.0, 0.0, 0.5, 0.0}; double[] l1vals = {0.0, 0.5, 0.0};
double[] biasL2 = {0.0, 0.0, 0.0, 0.2}; double[] biasL2 = {0.0, 0.0, 0.2};
double[] biasL1 = {0.0, 0.0, 0.6, 0.0}; double[] biasL1 = {0.0, 0.6, 0.0};
int[][] encoderLayerSizes = new int[][] {{5}, {5}, {5, 6}, {5, 6}}; int[][] encoderLayerSizes = new int[][] {{5}, {3, 4}, {3, 4}};
int[][] decoderLayerSizes = new int[][] {{6}, {7, 8}, {6}, {7, 8}}; int[][] decoderLayerSizes = new int[][] {{4}, {2}, {4, 3}};
int[] minibatches = new int[]{1,5,4,3}; int[] minibatches = new int[]{1,3,2,3};
Nd4j.getRandom().setSeed(12345); Nd4j.getRandom().setSeed(12345);
for( int i=0; i<activFns.length; i++ ){ for( int i=0; i<activFns.length; i++ ){
LossFunction lf = lossFunctions[i];
Activation outputActivation = outputActivations[i];
double l2 = l2vals[i]; double l2 = l2vals[i];
double l1 = l1vals[i]; double l1 = l1vals[i];
int[] encoderSizes = encoderLayerSizes[i]; int[] encoderSizes = encoderLayerSizes[i];
@ -214,18 +209,18 @@ public class VaeGradientCheckTests extends BaseDL4JTest {
@Test @Test
public void testVaePretrainReconstructionDistributions() { public void testVaePretrainReconstructionDistributions() {
int inOutSize = 6; int inOutSize = 3;
ReconstructionDistribution[] reconstructionDistributions = ReconstructionDistribution[] reconstructionDistributions =
new ReconstructionDistribution[]{new GaussianReconstructionDistribution(Activation.IDENTITY), new ReconstructionDistribution[]{new GaussianReconstructionDistribution(Activation.IDENTITY),
new GaussianReconstructionDistribution(Activation.TANH), new GaussianReconstructionDistribution(Activation.TANH),
new BernoulliReconstructionDistribution(Activation.SIGMOID), new BernoulliReconstructionDistribution(Activation.SIGMOID),
new CompositeReconstructionDistribution.Builder() new CompositeReconstructionDistribution.Builder()
.addDistribution(2, .addDistribution(1,
new GaussianReconstructionDistribution( new GaussianReconstructionDistribution(
Activation.IDENTITY)) Activation.IDENTITY))
.addDistribution(2, new BernoulliReconstructionDistribution()) .addDistribution(1, new BernoulliReconstructionDistribution())
.addDistribution(2, .addDistribution(1,
new GaussianReconstructionDistribution( new GaussianReconstructionDistribution(
Activation.TANH)) Activation.TANH))
.build(), .build(),
@ -248,12 +243,12 @@ public class VaeGradientCheckTests extends BaseDL4JTest {
break; break;
case 3: //Composite case 3: //Composite
data = Nd4j.create(minibatch, inOutSize); data = Nd4j.create(minibatch, inOutSize);
data.get(NDArrayIndex.all(), NDArrayIndex.interval(0, 2)).assign(Nd4j.rand(minibatch, 2)); data.get(NDArrayIndex.all(), NDArrayIndex.interval(0, 1)).assign(Nd4j.rand(minibatch, 1));
Nd4j.getExecutioner() Nd4j.getExecutioner()
.exec(new BernoulliDistribution( .exec(new BernoulliDistribution(
data.get(NDArrayIndex.all(), NDArrayIndex.interval(2, 4)), 0.5), data.get(NDArrayIndex.all(), NDArrayIndex.interval(1, 2)), 0.5),
Nd4j.getRandom()); Nd4j.getRandom());
data.get(NDArrayIndex.all(), NDArrayIndex.interval(4, 6)).assign(Nd4j.rand(minibatch, 2)); data.get(NDArrayIndex.all(), NDArrayIndex.interval(2, 3)).assign(Nd4j.rand(minibatch, 1));
break; break;
case 4: case 4:
case 5: case 5:
@ -269,7 +264,7 @@ public class VaeGradientCheckTests extends BaseDL4JTest {
.seed(12345L).dist(new NormalDistribution(0, 1)) .seed(12345L).dist(new NormalDistribution(0, 1))
.list().layer(0, .list().layer(0,
new VariationalAutoencoder.Builder().nIn(inOutSize).nOut(3) new VariationalAutoencoder.Builder().nIn(inOutSize).nOut(3)
.encoderLayerSizes(5).decoderLayerSizes(6) .encoderLayerSizes(4).decoderLayerSizes(3)
.pzxActivationFunction(Activation.TANH) .pzxActivationFunction(Activation.TANH)
.reconstructionDistribution( .reconstructionDistribution(
reconstructionDistributions[i]) reconstructionDistributions[i])
@ -304,17 +299,15 @@ public class VaeGradientCheckTests extends BaseDL4JTest {
int minibatch = 2; int minibatch = 2;
Nd4j.getRandom().setSeed(12345); Nd4j.getRandom().setSeed(12345);
for (int numSamples : new int[]{1, 3}) { for (int numSamples : new int[]{1, 2}) {
INDArray features = Nd4j.rand(DataType.DOUBLE, minibatch, 4);
// for (int numSamples : new int[]{10}) {
INDArray features = Nd4j.rand(minibatch, 4);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().l2(0.2).l1(0.3) MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().l2(0.2).l1(0.3)
.dataType(DataType.DOUBLE) .dataType(DataType.DOUBLE)
.updater(new NoOp()) .updater(new NoOp())
.seed(12345L).weightInit(WeightInit.XAVIER).list() .seed(12345L).weightInit(WeightInit.XAVIER).list()
.layer(0, new VariationalAutoencoder.Builder().nIn(4).nOut(3).encoderLayerSizes(5, 6) .layer(0, new VariationalAutoencoder.Builder().nIn(4).nOut(3).encoderLayerSizes(2, 3)
.decoderLayerSizes(7, 8).pzxActivationFunction(Activation.TANH) .decoderLayerSizes(4, 3).pzxActivationFunction(Activation.TANH)
.reconstructionDistribution( .reconstructionDistribution(
new GaussianReconstructionDistribution(Activation.TANH)) new GaussianReconstructionDistribution(Activation.TANH))
.numSamples(numSamples).activation(Activation.TANH) .numSamples(numSamples).activation(Activation.TANH)

View File

@ -72,9 +72,6 @@ public class YoloGradientCheckTests extends BaseDL4JTest {
@Test @Test
public void testYoloOutputLayer() { public void testYoloOutputLayer() {
int depthIn = 2; int depthIn = 2;
int[] minibatchSizes = {1, 3};
int[] widths = new int[]{4, 7};
int[] heights = new int[]{4, 5};
int c = 3; int c = 3;
int b = 3; int b = 3;
@ -83,52 +80,51 @@ public class YoloGradientCheckTests extends BaseDL4JTest {
Nd4j.getRandom().setSeed(1234567); Nd4j.getRandom().setSeed(1234567);
int[] minibatchSizes = {1, 3};
int[] widths = new int[]{4, 7};
int[] heights = new int[]{4, 5};
double[] l1 = new double[]{0.0, 0.3}; double[] l1 = new double[]{0.0, 0.3};
double[] l2 = new double[]{0.0, 0.4}; double[] l2 = new double[]{0.0, 0.4};
for( int wh = 0; wh<widths.length; wh++ ) { for( int i = 0; i<widths.length; i++ ) {
int w = widths[wh]; int w = widths[i];
int h = heights[wh]; int h = heights[i];
int mb = minibatchSizes[i];
Nd4j.getRandom().setSeed(12345); Nd4j.getRandom().setSeed(12345);
INDArray bbPrior = Nd4j.rand(b, 2).muliRowVector(Nd4j.create(new double[]{w, h})).addi(0.1); INDArray bbPrior = Nd4j.rand(b, 2).muliRowVector(Nd4j.create(new double[]{w, h})).addi(0.1);
for (int mb : minibatchSizes) { Nd4j.getRandom().setSeed(12345);
for (int i = 0; i < l1.length; i++) {
Nd4j.getRandom().setSeed(12345); INDArray input = Nd4j.rand(new int[]{mb, depthIn, h, w});
INDArray labels = yoloLabels(mb, c, h, w);
INDArray input = Nd4j.rand(new int[]{mb, depthIn, h, w}); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
INDArray labels = yoloLabels(mb, c, h, w); .dataType(DataType.DOUBLE)
.updater(new NoOp())
.activation(a)
.l1(l1[i]).l2(l2[i])
.convolutionMode(ConvolutionMode.Same)
.list()
.layer(new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1)
.nIn(depthIn).nOut(yoloDepth).build())//output: (5-2+0)/1+1 = 4
.layer(new Yolo2OutputLayer.Builder()
.boundingBoxPriors(bbPrior)
.build())
.build();
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) MultiLayerNetwork net = new MultiLayerNetwork(conf);
.dataType(DataType.DOUBLE) net.init();
.updater(new NoOp())
.activation(a)
.l1(l1[i]).l2(l2[i])
.convolutionMode(ConvolutionMode.Same)
.list()
.layer(new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1)
.nIn(depthIn).nOut(yoloDepth).build())//output: (5-2+0)/1+1 = 4
.layer(new Yolo2OutputLayer.Builder()
.boundingBoxPriors(bbPrior)
.build())
.build();
MultiLayerNetwork net = new MultiLayerNetwork(conf); String msg = "testYoloOutputLayer() - minibatch = " + mb + ", w=" + w + ", h=" + h + ", l1=" + l1[i] + ", l2=" + l2[i];
net.init(); System.out.println(msg);
String msg = "testYoloOutputLayer() - minibatch = " + mb + ", w=" + w + ", h=" + h + ", l1=" + l1[i] + ", l2=" + l2[i]; boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
System.out.println(msg); DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels, null, null, true, 100);
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, assertTrue(msg, gradOK);
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); TestUtils.testModelSerialization(net);
assertTrue(msg, gradOK);
TestUtils.testModelSerialization(net);
}
}
} }
} }
@ -233,7 +229,7 @@ public class YoloGradientCheckTests extends BaseDL4JTest {
INDArray l = ds.getLabels(); INDArray l = ds.getLabels();
boolean ok = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, boolean ok = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, f, l); DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, f, l, null, null, true, 64);
assertTrue(ok); assertTrue(ok);
TestUtils.testModelSerialization(net); TestUtils.testModelSerialization(net);

View File

@ -446,7 +446,7 @@ public class DTypeTests extends BaseDL4JTest {
.layer(new ActivationLayer(Activation.LEAKYRELU)) .layer(new ActivationLayer(Activation.LEAKYRELU))
.layer(secondLast) .layer(secondLast)
.layer(ol) .layer(ol)
.setInputType(InputType.convolutionalFlat(28, 28, 1)) .setInputType(InputType.convolutionalFlat(8, 8, 1))
.build(); .build();
MultiLayerNetwork net = new MultiLayerNetwork(conf); MultiLayerNetwork net = new MultiLayerNetwork(conf);
@ -457,16 +457,16 @@ public class DTypeTests extends BaseDL4JTest {
assertEquals(msg, networkDtype, net.getFlattenedGradients().dataType()); assertEquals(msg, networkDtype, net.getFlattenedGradients().dataType());
assertEquals(msg, networkDtype, net.getUpdater(true).getStateViewArray().dataType()); assertEquals(msg, networkDtype, net.getUpdater(true).getStateViewArray().dataType());
INDArray in = Nd4j.rand(networkDtype, 2, 28 * 28); INDArray in = Nd4j.rand(networkDtype, 2, 8 * 8);
INDArray label; INDArray label;
if (outputLayer < 3) { if (outputLayer < 3) {
label = TestUtils.randomOneHot(2, 10).castTo(networkDtype); label = TestUtils.randomOneHot(2, 10).castTo(networkDtype);
} else if (outputLayer == 3) { } else if (outputLayer == 3) {
//CNN loss //CNN loss
label = Nd4j.rand(networkDtype, 2, 3, 28, 28); label = Nd4j.rand(networkDtype, 2, 3, 8, 8);
} else if (outputLayer == 4) { } else if (outputLayer == 4) {
//YOLO //YOLO
label = Nd4j.ones(networkDtype, 2, 6, 28, 28); label = Nd4j.ones(networkDtype, 2, 6, 8, 8);
} else { } else {
throw new IllegalStateException(); throw new IllegalStateException();
} }
@ -550,7 +550,7 @@ public class DTypeTests extends BaseDL4JTest {
.layer(new Upsampling3D.Builder().size(2).build()) .layer(new Upsampling3D.Builder().size(2).build())
.layer(secondLast) .layer(secondLast)
.layer(ol) .layer(ol)
.setInputType(InputType.convolutional3D(Convolution3D.DataFormat.NCDHW, 28, 28, 28, 1)) .setInputType(InputType.convolutional3D(Convolution3D.DataFormat.NCDHW, 8, 8, 8, 1))
.build(); .build();
MultiLayerNetwork net = new MultiLayerNetwork(conf); MultiLayerNetwork net = new MultiLayerNetwork(conf);
@ -561,13 +561,13 @@ public class DTypeTests extends BaseDL4JTest {
assertEquals(msg, networkDtype, net.getFlattenedGradients().dataType()); assertEquals(msg, networkDtype, net.getFlattenedGradients().dataType());
assertEquals(msg, networkDtype, net.getUpdater(true).getStateViewArray().dataType()); assertEquals(msg, networkDtype, net.getUpdater(true).getStateViewArray().dataType());
INDArray in = Nd4j.rand(networkDtype, 2, 1, 28, 28, 28); INDArray in = Nd4j.rand(networkDtype, 2, 1, 8, 8, 8);
INDArray label; INDArray label;
if (outputLayer == 0) { if (outputLayer == 0) {
label = TestUtils.randomOneHot(2, 10).castTo(networkDtype); label = TestUtils.randomOneHot(2, 10).castTo(networkDtype);
} else if (outputLayer == 1) { } else if (outputLayer == 1) {
//CNN3D loss //CNN3D loss
label = Nd4j.rand(networkDtype, 2, 3, 28, 28, 28); label = Nd4j.rand(networkDtype, 2, 3, 8, 8, 8);
} else if (outputLayer == 2) { } else if (outputLayer == 2) {
label = TestUtils.randomOneHot(2, 10).castTo(networkDtype); label = TestUtils.randomOneHot(2, 10).castTo(networkDtype);
} else { } else {
@ -787,15 +787,15 @@ public class DTypeTests extends BaseDL4JTest {
switch (outputLayer) { switch (outputLayer) {
case 0: case 0:
ol = new RnnOutputLayer.Builder().nOut(5).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(); ol = new RnnOutputLayer.Builder().nOut(5).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build();
secondLast = new LSTM.Builder().nOut(5).activation(Activation.TANH).build(); secondLast = new SimpleRnn.Builder().nOut(5).activation(Activation.TANH).build();
break; break;
case 1: case 1:
ol = new RnnLossLayer.Builder().activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(); ol = new RnnLossLayer.Builder().activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build();
secondLast = new LSTM.Builder().nOut(5).activation(Activation.TANH).build(); secondLast = new SimpleRnn.Builder().nOut(5).activation(Activation.TANH).build();
break; break;
case 2: case 2:
ol = new OutputLayer.Builder().nOut(5).build(); ol = new OutputLayer.Builder().nOut(5).build();
secondLast = new LastTimeStep(new LSTM.Builder().nOut(5).activation(Activation.TANH).build()); secondLast = new LastTimeStep(new SimpleRnn.Builder().nOut(5).activation(Activation.TANH).build());
break; break;
default: default:
throw new RuntimeException(); throw new RuntimeException();
@ -825,12 +825,12 @@ public class DTypeTests extends BaseDL4JTest {
assertEquals(msg, networkDtype, net.getFlattenedGradients().dataType()); assertEquals(msg, networkDtype, net.getFlattenedGradients().dataType());
assertEquals(msg, networkDtype, net.getUpdater(true).getStateViewArray().dataType()); assertEquals(msg, networkDtype, net.getUpdater(true).getStateViewArray().dataType());
INDArray in = Nd4j.rand(networkDtype, 2, 5, 4); INDArray in = Nd4j.rand(networkDtype, 2, 5, 2);
INDArray label; INDArray label;
if (outputLayer == 2) { if (outputLayer == 2) {
label = TestUtils.randomOneHot(2, 5).castTo(networkDtype); label = TestUtils.randomOneHot(2, 5).castTo(networkDtype);
} else { } else {
label = TestUtils.randomOneHotTimeSeries(2, 5, 4).castTo(networkDtype); label = TestUtils.randomOneHotTimeSeries(2, 5, 2).castTo(networkDtype);
} }
@ -845,7 +845,7 @@ public class DTypeTests extends BaseDL4JTest {
net.setLabels(label); net.setLabels(label);
net.computeGradientAndScore(); net.computeGradientAndScore();
net.fit(new DataSet(in, label, Nd4j.ones(networkDtype, 2, 4), outputLayer == 2 ? null : Nd4j.ones(networkDtype, 2, 4))); net.fit(new DataSet(in, label, Nd4j.ones(networkDtype, 2, 2), outputLayer == 2 ? null : Nd4j.ones(networkDtype, 2, 2)));
logUsedClasses(net); logUsedClasses(net);
@ -1219,9 +1219,9 @@ public class DTypeTests extends BaseDL4JTest {
.addLayer("2", new LocallyConnected1D.Builder().kernelSize(2).nOut(4).build(), "1") .addLayer("2", new LocallyConnected1D.Builder().kernelSize(2).nOut(4).build(), "1")
.addLayer("out", new RnnOutputLayer.Builder().nOut(10).build(), "2") .addLayer("out", new RnnOutputLayer.Builder().nOut(10).build(), "2")
.setOutputs("out") .setOutputs("out")
.setInputTypes(InputType.recurrent(5, 4)); .setInputTypes(InputType.recurrent(5, 2));
in = new INDArray[]{Nd4j.rand(networkDtype, 2, 5, 4)}; in = new INDArray[]{Nd4j.rand(networkDtype, 2, 5, 2)};
label = TestUtils.randomOneHotTimeSeries(2, 10, 4); label = TestUtils.randomOneHotTimeSeries(2, 10, 2);
break; break;
case 1: case 1:
b.addInputs("in") b.addInputs("in")
@ -1229,8 +1229,8 @@ public class DTypeTests extends BaseDL4JTest {
.addLayer("2", new LocallyConnected2D.Builder().kernelSize(2, 2).nOut(5).build(), "1") .addLayer("2", new LocallyConnected2D.Builder().kernelSize(2, 2).nOut(5).build(), "1")
.addLayer("out", new OutputLayer.Builder().nOut(10).build(), "2") .addLayer("out", new OutputLayer.Builder().nOut(10).build(), "2")
.setOutputs("out") .setOutputs("out")
.setInputTypes(InputType.convolutional(28, 28, 1)); .setInputTypes(InputType.convolutional(8, 8, 1));
in = new INDArray[]{Nd4j.rand(networkDtype, 2, 1, 28, 28)}; in = new INDArray[]{Nd4j.rand(networkDtype, 2, 1, 8, 8)};
label = TestUtils.randomOneHot(2, 10).castTo(networkDtype); label = TestUtils.randomOneHot(2, 10).castTo(networkDtype);
break; break;
default: default:

View File

@ -31,6 +31,7 @@ import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
import org.deeplearning4j.nn.conf.layers.LossLayer; import org.deeplearning4j.nn.conf.layers.LossLayer;
import org.deeplearning4j.nn.conf.layers.PrimaryCapsules; import org.deeplearning4j.nn.conf.layers.PrimaryCapsules;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
import org.junit.Ignore;
import org.junit.Test; import org.junit.Test;
import org.nd4j.evaluation.classification.Evaluation; import org.nd4j.evaluation.classification.Evaluation;
import org.nd4j.linalg.activations.impl.ActivationSoftmax; import org.nd4j.linalg.activations.impl.ActivationSoftmax;
@ -38,6 +39,7 @@ import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.learning.config.Adam; import org.nd4j.linalg.learning.config.Adam;
import org.nd4j.linalg.lossfunctions.impl.LossNegativeLogLikelihood; import org.nd4j.linalg.lossfunctions.impl.LossNegativeLogLikelihood;
@Ignore("AB - ignored due to excessive runtime. Keep for manual debugging when required")
public class CapsNetMNISTTest extends BaseDL4JTest { public class CapsNetMNISTTest extends BaseDL4JTest {
@Override @Override

View File

@ -95,7 +95,7 @@ public class LocallyConnectedLayerTest extends BaseDL4JTest {
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).l2(2e-4) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).l2(2e-4)
.updater(new Nesterovs(0.9)).dropOut(0.5) .updater(new Nesterovs(0.9)).dropOut(0.5)
.list() .list()
.layer(new LocallyConnected1D.Builder().kernelSize(8).nIn(3) .layer(new LocallyConnected1D.Builder().kernelSize(4).nIn(3)
.stride(1).nOut(16).dropOut(0.5) .stride(1).nOut(16).dropOut(0.5)
.convolutionMode(ConvolutionMode.Strict) .convolutionMode(ConvolutionMode.Strict)
.setInputSize(28) .setInputSize(28)
@ -104,19 +104,19 @@ public class LocallyConnectedLayerTest extends BaseDL4JTest {
.build()) .build())
.layer(new OutputLayer.Builder(LossFunctions.LossFunction.SQUARED_LOSS) //output layer .layer(new OutputLayer.Builder(LossFunctions.LossFunction.SQUARED_LOSS) //output layer
.nOut(10).weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).build()) .nOut(10).weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).build())
.setInputType(InputType.recurrent(3, 28)); .setInputType(InputType.recurrent(3, 8));
MultiLayerConfiguration conf = builder.build(); MultiLayerConfiguration conf = builder.build();
MultiLayerNetwork network = new MultiLayerNetwork(conf); MultiLayerNetwork network = new MultiLayerNetwork(conf);
network.init(); network.init();
INDArray input = Nd4j.ones(10, 3, 28); INDArray input = Nd4j.ones(10, 3, 8);
INDArray output = network.output(input, false);; INDArray output = network.output(input, false);;
for (int i = 0; i < 100; i++) { // TODO: this falls flat for 1000 iterations on my machine for (int i = 0; i < 100; i++) { // TODO: this falls flat for 1000 iterations on my machine
output = network.output(input, false); output = network.output(input, false);
} }
assertArrayEquals(new long[] {(28 - 8 + 1) * 10, 10}, output.shape()); assertArrayEquals(new long[] {(8 - 4 + 1) * 10, 10}, output.shape());
network.fit(input, output); network.fit(input, output);
} }
@ -159,8 +159,10 @@ public class LocallyConnectedLayerTest extends BaseDL4JTest {
.addLayer("2", new LocallyConnected2D.Builder().kernelSize(2,2).nOut(5).build(), "1") .addLayer("2", new LocallyConnected2D.Builder().kernelSize(2,2).nOut(5).build(), "1")
.addLayer("out", new OutputLayer.Builder().nOut(10).build(), "2") .addLayer("out", new OutputLayer.Builder().nOut(10).build(), "2")
.setOutputs("out") .setOutputs("out")
.setInputTypes(InputType.convolutional(28, 28, 1)); // .setInputTypes(InputType.convolutional(28, 28, 1));
in = new INDArray[]{Nd4j.rand(networkDtype, 2, 1, 28, 28)}; // in = new INDArray[]{Nd4j.rand(networkDtype, 2, 1, 28, 28)};
.setInputTypes(InputType.convolutional(8, 8, 1));
in = new INDArray[]{Nd4j.rand(networkDtype, 2, 1, 8, 8)};
label = TestUtils.randomOneHot(2, 10).castTo(networkDtype); label = TestUtils.randomOneHot(2, 10).castTo(networkDtype);
break; break;
default: default:

View File

@ -93,8 +93,6 @@ public class TestSameDiffConv extends BaseDL4JTest {
//Note: to avoid the exporential number of tests here, we'll randomly run every Nth test only. //Note: to avoid the exporential number of tests here, we'll randomly run every Nth test only.
//With n=1, m=3 this is 1 out of every 3 tests (on average) //With n=1, m=3 this is 1 out of every 3 tests (on average)
Random r = new Random(12345); Random r = new Random(12345);
int n = 1;
int m = 30; //1 ot of every 30... 3888 possible combinations here
for (int minibatch : new int[]{5, 1}) { for (int minibatch : new int[]{5, 1}) {
Activation[] afns = new Activation[]{ Activation[] afns = new Activation[]{
@ -117,11 +115,8 @@ public class TestSameDiffConv extends BaseDL4JTest {
for (int[] dilation : new int[][]{{1, 1}, {2, 2}, {1, 2}}) { for (int[] dilation : new int[][]{{1, 1}, {2, 2}, {1, 2}}) {
for (ConvolutionMode cm : new ConvolutionMode[]{ConvolutionMode.Truncate, ConvolutionMode.Same}) { for (ConvolutionMode cm : new ConvolutionMode[]{ConvolutionMode.Truncate, ConvolutionMode.Same}) {
for (Activation a : afns) { for (Activation a : afns) {
int i = r.nextInt(m); if(r.nextInt(80) != 0)
if (i >= n) { continue; //1 of 80 on average - of 3888 possible combinations here -> ~49 tests
//Example: n=2, m=3... skip on i=2, run test on i=0, i=1
continue;
}
String msg = "Test " + (count++) + " - minibatch=" + minibatch + ", nIn=" + nIn String msg = "Test " + (count++) + " - minibatch=" + minibatch + ", nIn=" + nIn
+ ", nOut=" + nOut + ", kernel=" + Arrays.toString(kernel) + ", stride=" + ", nOut=" + nOut + ", kernel=" + Arrays.toString(kernel) + ", stride="
@ -306,7 +301,7 @@ public class TestSameDiffConv extends BaseDL4JTest {
log.info("Starting: " + msg); log.info("Starting: " + msg);
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, f, l); DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, f, l, null, null, true, 50); //Most of weights are in output layer
assertTrue(msg, gradOK); assertTrue(msg, gradOK);

View File

@ -100,7 +100,7 @@ public class TestOptimizers extends BaseDL4JTest {
ds.normalizeZeroMeanZeroUnitVariance(); ds.normalizeZeroMeanZeroUnitVariance();
for (OptimizationAlgorithm oa : toTest) { for (OptimizationAlgorithm oa : toTest) {
int nIter = 10; int nIter = 5;
MultiLayerNetwork network = new MultiLayerNetwork(getMLPConfigIris(oa)); MultiLayerNetwork network = new MultiLayerNetwork(getMLPConfigIris(oa));
network.init(); network.init();
double score = network.score(ds); double score = network.score(ds);
@ -109,7 +109,7 @@ public class TestOptimizers extends BaseDL4JTest {
if (PRINT_OPT_RESULTS) if (PRINT_OPT_RESULTS)
System.out.println("testOptimizersMLP() - " + oa); System.out.println("testOptimizersMLP() - " + oa);
int nCallsToOptimizer = 30; int nCallsToOptimizer = 10;
double[] scores = new double[nCallsToOptimizer + 1]; double[] scores = new double[nCallsToOptimizer + 1];
scores[0] = score; scores[0] = score;
for (int i = 0; i < nCallsToOptimizer; i++) { for (int i = 0; i < nCallsToOptimizer; i++) {
@ -256,34 +256,6 @@ public class TestOptimizers extends BaseDL4JTest {
} }
} }
@Test
public void testSphereFnOptStochGradDescentMultipleSteps() {
//Earlier tests: only do a single line search, though each line search will do multiple iterations
// of line search algorithm.
//Here, do multiple optimization runs + multiple line search iterations within each run
//i.e., gradient is re-calculated at each step/run
//Single step tests earlier won't test storing of state between iterations
testSphereFnMultipleStepsHelper(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT, 100, 5);
}
@Test
public void testSphereFnOptLineGradDescentMultipleSteps() {
testSphereFnMultipleStepsHelper(OptimizationAlgorithm.LINE_GRADIENT_DESCENT, 100, 5);
}
@Test
public void testSphereFnOptCGMultipleSteps() {
testSphereFnMultipleStepsHelper(OptimizationAlgorithm.CONJUGATE_GRADIENT, 100, 5);
}
@Test
public void testSphereFnOptLBFGSMultipleSteps() {
testSphereFnMultipleStepsHelper(OptimizationAlgorithm.LBFGS, 100, 5);
}
private static void testSphereFnMultipleStepsHelper(OptimizationAlgorithm oa, int nOptIter, private static void testSphereFnMultipleStepsHelper(OptimizationAlgorithm oa, int nOptIter,
int maxNumLineSearchIter) { int maxNumLineSearchIter) {
double[] scores = new double[nOptIter + 1]; double[] scores = new double[nOptIter + 1];

View File

@ -58,8 +58,8 @@ public class ValidateCuDNN extends BaseDL4JTest {
int numClasses = 10; int numClasses = 10;
//imageHeight,imageWidth,channels //imageHeight,imageWidth,channels
int imageHeight = 240; int imageHeight = 64;
int imageWidth = 240; int imageWidth = 64;
int channels = 3; int channels = 3;
IActivation activation = new ActivationIdentity(); IActivation activation = new ActivationIdentity();
MultiLayerConfiguration multiLayerConfiguration = new NeuralNetConfiguration.Builder() MultiLayerConfiguration multiLayerConfiguration = new NeuralNetConfiguration.Builder()
@ -68,9 +68,9 @@ public class ValidateCuDNN extends BaseDL4JTest {
.activation(new ActivationELU()) .activation(new ActivationELU())
.updater(new Nesterovs(1e-3, 0.9)) .updater(new Nesterovs(1e-3, 0.9))
.list( .list(
new Convolution2D.Builder().nOut(96) new Convolution2D.Builder().nOut(16)
.kernelSize(11, 11).biasInit(0.0) .kernelSize(4, 4).biasInit(0.0)
.stride(4, 4).build(), .stride(2, 2).build(),
new ActivationLayer.Builder().activation(activation).build(), new ActivationLayer.Builder().activation(activation).build(),
new Pooling2D.Builder() new Pooling2D.Builder()
.poolingType(SubsamplingLayer.PoolingType.MAX) .poolingType(SubsamplingLayer.PoolingType.MAX)
@ -85,12 +85,12 @@ public class ValidateCuDNN extends BaseDL4JTest {
.poolingType(SubsamplingLayer.PoolingType.MAX) .poolingType(SubsamplingLayer.PoolingType.MAX)
.kernelSize(3, 3).stride(2, 2) .kernelSize(3, 3).stride(2, 2)
.build(), .build(),
new Convolution2D.Builder().nOut(384) new Convolution2D.Builder().nOut(16)
.kernelSize(3, 3).padding(1, 1) .kernelSize(3, 3).padding(1, 1)
.biasInit(0.0) .biasInit(0.0)
.stride(1, 1).build(), .stride(1, 1).build(),
new ActivationLayer.Builder().activation(activation).build(), new ActivationLayer.Builder().activation(activation).build(),
new Convolution2D.Builder().nOut(256) new Convolution2D.Builder().nOut(16)
.kernelSize(3, 3).padding(1, 1) .kernelSize(3, 3).padding(1, 1)
.stride(1, 1).build(), .stride(1, 1).build(),
new ActivationLayer.Builder().activation(activation).build(), new ActivationLayer.Builder().activation(activation).build(),
@ -99,7 +99,7 @@ public class ValidateCuDNN extends BaseDL4JTest {
.kernelSize(3, 3).stride(2, 2) .kernelSize(3, 3).stride(2, 2)
.build(), .build(),
new DenseLayer.Builder() new DenseLayer.Builder()
.nOut(4096) .nOut(64)
.biasInit(0.0) .biasInit(0.0)
.build(), .build(),
new ActivationLayer.Builder().activation(activation).build(), new ActivationLayer.Builder().activation(activation).build(),
@ -114,8 +114,8 @@ public class ValidateCuDNN extends BaseDL4JTest {
MultiLayerNetwork net = new MultiLayerNetwork(multiLayerConfiguration); MultiLayerNetwork net = new MultiLayerNetwork(multiLayerConfiguration);
net.init(); net.init();
int[] fShape = new int[]{32, channels, imageHeight, imageWidth}; int[] fShape = new int[]{8, channels, imageHeight, imageWidth};
int[] lShape = new int[]{32, numClasses}; int[] lShape = new int[]{8, numClasses};
List<Class<?>> classesToTest = new ArrayList<>(); List<Class<?>> classesToTest = new ArrayList<>();
classesToTest.add(ConvolutionLayer.class); classesToTest.add(ConvolutionLayer.class);

View File

@ -144,12 +144,6 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
// (a) activation function // (a) activation function
// (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation') // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
// (c) Loss function (with specified output activations) // (c) Loss function (with specified output activations)
Activation[] activFns = {Activation.SIGMOID, Activation.TANH};
boolean[] characteristic = {false, true}; //If true: run some backprop steps first
LossFunctions.LossFunction[] lossFunctions =
{LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE};
Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here
DataSet ds = new IrisDataSetIterator(150, 150).next(); DataSet ds = new IrisDataSetIterator(150, 150).next();
ds.normalizeZeroMeanZeroUnitVariance(); ds.normalizeZeroMeanZeroUnitVariance();
@ -161,73 +155,74 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
double[] l1vals = {0.0, 0.0, 0.5, 0.0}; double[] l1vals = {0.0, 0.0, 0.5, 0.0};
double[] biasL2 = {0.0, 0.0, 0.0, 0.2}; double[] biasL2 = {0.0, 0.0, 0.0, 0.2};
double[] biasL1 = {0.0, 0.0, 0.6, 0.0}; double[] biasL1 = {0.0, 0.0, 0.6, 0.0};
Activation[] activFns = {Activation.SIGMOID, Activation.TANH, Activation.ELU, Activation.SOFTPLUS};
boolean[] characteristic = {false, true, false, true}; //If true: run some backprop steps first
for (Activation afn : activFns) { LossFunctions.LossFunction[] lossFunctions =
for (boolean doLearningFirst : characteristic) { {LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE, LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE};
for (int i = 0; i < lossFunctions.length; i++) { Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH, Activation.SOFTMAX, Activation.IDENTITY}; //i.e., lossFunctions[i] used with outputActivations[i] here
for (int k = 0; k < l2vals.length; k++) {
LossFunctions.LossFunction lf = lossFunctions[i];
Activation outputActivation = outputActivations[i];
double l2 = l2vals[k];
double l1 = l1vals[k];
MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder() for( int i=0; i<l2vals.length; i++ ){
.dataType(DataType.DOUBLE) Activation afn = activFns[i];
.l2(l2).l1(l1).l2Bias(biasL2[k]).l1Bias(biasL1[k]) boolean doLearningFirst = characteristic[i];
.optimizationAlgo( LossFunctions.LossFunction lf = lossFunctions[i];
OptimizationAlgorithm.CONJUGATE_GRADIENT) Activation outputActivation = outputActivations[i];
.seed(12345L).list() double l2 = l2vals[i];
.layer(0, new ConvolutionLayer.Builder(new int[]{1, 1}).nIn(1).nOut(6) double l1 = l1vals[i];
.cudnnAllowFallback(false)
.weightInit(WeightInit.XAVIER).activation(afn)
.updater(new NoOp()).build())
.layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nOut(3)
.weightInit(WeightInit.XAVIER).updater(new NoOp()).build())
.setInputType(InputType.convolutionalFlat(1, 4, 1)); MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder()
.dataType(DataType.DOUBLE)
.l2(l2).l1(l1).l2Bias(biasL2[i]).l1Bias(biasL1[i])
.optimizationAlgo(
OptimizationAlgorithm.CONJUGATE_GRADIENT)
.seed(12345L).list()
.layer(0, new ConvolutionLayer.Builder(new int[]{1, 1}).nIn(1).nOut(6)
.weightInit(WeightInit.XAVIER).activation(afn)
.updater(new NoOp()).build())
.layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nOut(3)
.weightInit(WeightInit.XAVIER).updater(new NoOp()).build())
MultiLayerConfiguration conf = builder.build(); .setInputType(InputType.convolutionalFlat(1, 4, 1));
MultiLayerNetwork mln = new MultiLayerNetwork(conf); MultiLayerConfiguration conf = builder.build();
mln.init();
String testName = new Object() {
}.getClass().getEnclosingMethod().getName();
if (doLearningFirst) { MultiLayerNetwork mln = new MultiLayerNetwork(conf);
//Run a number of iterations of learning mln.init();
mln.setInput(ds.getFeatures()); String testName = new Object() {
mln.setLabels(ds.getLabels()); }.getClass().getEnclosingMethod().getName();
mln.computeGradientAndScore();
double scoreBefore = mln.score();
for (int j = 0; j < 10; j++)
mln.fit(ds);
mln.computeGradientAndScore();
double scoreAfter = mln.score();
//Can't test in 'characteristic mode of operation' if not learning
String msg = testName
+ "- score did not (sufficiently) decrease during learning - activationFn="
+ afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation
+ ", doLearningFirst=" + doLearningFirst + " (before=" + scoreBefore
+ ", scoreAfter=" + scoreAfter + ")";
assertTrue(msg, scoreAfter < 0.8 * scoreBefore);
}
if (PRINT_RESULTS) { if (doLearningFirst) {
System.out.println(testName + "- activationFn=" + afn + ", lossFn=" + lf //Run a number of iterations of learning
+ ", outputActivation=" + outputActivation + ", doLearningFirst=" mln.setInput(ds.getFeatures());
+ doLearningFirst); mln.setLabels(ds.getLabels());
for (int j = 0; j < mln.getnLayers(); j++) mln.computeGradientAndScore();
System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams()); double scoreBefore = mln.score();
} for (int j = 0; j < 10; j++)
mln.fit(ds);
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, mln.computeGradientAndScore();
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); double scoreAfter = mln.score();
//Can't test in 'characteristic mode of operation' if not learning
assertTrue(gradOK); String msg = testName
TestUtils.testModelSerialization(mln); + "- score did not (sufficiently) decrease during learning - activationFn="
} + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation
} + ", doLearningFirst=" + doLearningFirst + " (before=" + scoreBefore
+ ", scoreAfter=" + scoreAfter + ")";
assertTrue(msg, scoreAfter < 0.8 * scoreBefore);
} }
if (PRINT_RESULTS) {
System.out.println(testName + "- activationFn=" + afn + ", lossFn=" + lf
+ ", outputActivation=" + outputActivation + ", doLearningFirst="
+ doLearningFirst);
for (int j = 0; j < mln.getnLayers(); j++)
System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(gradOK);
TestUtils.testModelSerialization(mln);
} }
} }
@ -375,57 +370,43 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
int[] padding = {0, 0}; int[] padding = {0, 0};
int size = 2; int size = 2;
String[] activations = {"sigmoid", "tanh"}; for (int minibatchSize : minibatchSizes) {
SubsamplingLayer.PoolingType[] poolingTypes = INDArray input = Nd4j.rand(minibatchSize, width * height * inputDepth);
new SubsamplingLayer.PoolingType[]{SubsamplingLayer.PoolingType.MAX, INDArray labels = TestUtils.randomOneHot(minibatchSize, nOut);
SubsamplingLayer.PoolingType.AVG, SubsamplingLayer.PoolingType.PNORM};
for (String afn : activations) { MultiLayerConfiguration conf =
for (SubsamplingLayer.PoolingType poolingType : poolingTypes) { new NeuralNetConfiguration.Builder()
for (int minibatchSize : minibatchSizes) { .dataType(DataType.DOUBLE)
INDArray input = Nd4j.rand(minibatchSize, width * height * inputDepth); .updater(new NoOp())
INDArray labels = Nd4j.zeros(minibatchSize, nOut); .dist(new NormalDistribution(0, 1))
for (int i = 0; i < minibatchSize; i++) { .list().layer(new ConvolutionLayer.Builder(kernel,
labels.putScalar(new int[]{i, i % nOut}, 1.0); stride, padding).nIn(inputDepth)
} .nOut(3).build())//output: (5-2+0)/1+1 = 4
.layer(new Upsampling2D.Builder().size(size).build()) //output: 4*2 =8 -> 8x8x3
.layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nIn(8 * 8 * 3)
.nOut(4).build())
.setInputType(InputType.convolutionalFlat(height, width,
inputDepth))
.build();
MultiLayerConfiguration conf = MultiLayerNetwork net = new MultiLayerNetwork(conf);
new NeuralNetConfiguration.Builder() net.init();
.dataType(DataType.DOUBLE)
.updater(new NoOp())
.dist(new NormalDistribution(0, 1))
.list().layer(new ConvolutionLayer.Builder(kernel,
stride, padding).nIn(inputDepth)
.cudnnAllowFallback(false)
.nOut(3).build())//output: (5-2+0)/1+1 = 4
.layer(new Upsampling2D.Builder().size(size).build()) //output: 4*2 =8 -> 8x8x3
.layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nIn(8 * 8 * 3)
.nOut(4).build())
.setInputType(InputType.convolutionalFlat(height, width,
inputDepth))
.build();
MultiLayerNetwork net = new MultiLayerNetwork(conf); String msg = "Upsampling - minibatch=" + minibatchSize;
net.init();
String msg = "PoolingType=" + poolingType + ", minibatch=" + minibatchSize + ", activationFn=" if (PRINT_RESULTS) {
+ afn; System.out.println(msg);
for (int j = 0; j < net.getnLayers(); j++)
if (PRINT_RESULTS) { System.out.println("Layer " + j + " # params: " + net.getLayer(j).numParams());
System.out.println(msg);
for (int j = 0; j < net.getnLayers(); j++)
System.out.println("Layer " + j + " # params: " + net.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(msg, gradOK);
TestUtils.testModelSerialization(net);
}
} }
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(msg, gradOK);
TestUtils.testModelSerialization(net);
} }
} }
@ -646,63 +627,56 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
public void testCnnSamePaddingMode() { public void testCnnSamePaddingMode() {
int nOut = 2; int nOut = 2;
int[] minibatchSizes = {1, 3}; int[] minibatchSizes = {1, 3, 3, 2, 1, 2};
int[] heights = new int[]{4, 5, 6, 5, 4, 4}; //Same padding mode: insensitive to exact input size...
int[] kernelSizes = new int[]{2, 3, 2, 3, 2, 3};
int[] inputDepths = {1, 2, 4, 3, 2, 3};
int width = 5; int width = 5;
int[] heights = new int[]{4, 5, 6}; //Same padding mode: insensitive to exact input size...
int[] kernelSizes = new int[]{2, 3};
int[] inputDepths = {1, 2, 4};
Nd4j.getRandom().setSeed(12345); Nd4j.getRandom().setSeed(12345);
for (int inputDepth : inputDepths) { for( int i=0; i<minibatchSizes.length; i++ ){
for (int minibatchSize : minibatchSizes) { int inputDepth = inputDepths[i];
for (int height : heights) { int minibatchSize = minibatchSizes[i];
for (int k : kernelSizes) { int height = heights[i];
int k = kernelSizes[i];
INDArray input = Nd4j.rand(minibatchSize, width * height * inputDepth); INDArray input = Nd4j.rand(minibatchSize, width * height * inputDepth);
INDArray labels = Nd4j.zeros(minibatchSize, nOut); INDArray labels = TestUtils.randomOneHot(minibatchSize, nOut);
for (int i = 0; i < minibatchSize; i++) {
labels.putScalar(new int[]{i, i % nOut}, 1.0);
}
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
.dataType(DataType.DOUBLE) .dataType(DataType.DOUBLE)
.updater(new NoOp()) .updater(new NoOp())
.activation(Activation.TANH).convolutionMode(Same).list() .activation(Activation.TANH).convolutionMode(Same).list()
.layer(0, new ConvolutionLayer.Builder().name("layer 0").kernelSize(k, k) .layer(0, new ConvolutionLayer.Builder().name("layer 0").kernelSize(k, k)
.cudnnAllowFallback(false) .stride(1, 1).padding(0, 0).nIn(inputDepth).nOut(2).build())
.stride(1, 1).padding(0, 0).nIn(inputDepth).nOut(2).build()) .layer(1, new SubsamplingLayer.Builder()
.layer(1, new SubsamplingLayer.Builder() .poolingType(SubsamplingLayer.PoolingType.MAX).kernelSize(k, k)
.poolingType(SubsamplingLayer.PoolingType.MAX).kernelSize(k, k) .stride(1, 1).padding(0, 0).build())
.cudnnAllowFallback(false) .layer(2, new ConvolutionLayer.Builder().nIn(2).nOut(2).kernelSize(k, k)
.stride(1, 1).padding(0, 0).build()) .stride(1, 1).padding(0, 0).build())
.layer(2, new ConvolutionLayer.Builder().nIn(2).nOut(2).kernelSize(k, k) .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.cudnnAllowFallback(false) .activation(Activation.SOFTMAX).nOut(nOut).build())
.stride(1, 1).padding(0, 0).build()) .setInputType(InputType.convolutionalFlat(height, width, inputDepth)).build();
.layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nOut(nOut).build())
.setInputType(InputType.convolutionalFlat(height, width, inputDepth)).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf); MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init(); net.init();
for (int i = 0; i < net.getLayers().length; i++) { for (int j = 0; j < net.getLayers().length; j++) {
System.out.println("nParams, layer " + i + ": " + net.getLayer(i).numParams()); System.out.println("nParams, layer " + j + ": " + net.getLayer(j).numParams());
}
String msg = "Minibatch=" + minibatchSize + ", inDepth=" + inputDepth + ", height=" + height
+ ", width=" + width + ", kernelSize=" + k;
System.out.println(msg);
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(msg, gradOK);
TestUtils.testModelSerialization(net);
}
}
} }
String msg = "Minibatch=" + minibatchSize + ", inDepth=" + inputDepth + ", height=" + height
+ ", kernelSize=" + k;
System.out.println(msg);
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(msg, gradOK);
TestUtils.testModelSerialization(net);
} }
} }
@ -732,12 +706,10 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
} }
Layer convLayer = new ConvolutionLayer.Builder().name("layer 0").kernelSize(k, k) Layer convLayer = new ConvolutionLayer.Builder().name("layer 0").kernelSize(k, k)
.cudnnAllowFallback(false)
.stride(stride, stride).padding(0, 0).nIn(inputDepth).nOut(2).build(); .stride(stride, stride).padding(0, 0).nIn(inputDepth).nOut(2).build();
Layer poolLayer = new SubsamplingLayer.Builder() Layer poolLayer = new SubsamplingLayer.Builder()
.poolingType(SubsamplingLayer.PoolingType.MAX).kernelSize(k, k) .poolingType(SubsamplingLayer.PoolingType.MAX).kernelSize(k, k)
.cudnnAllowFallback(false)
.stride(stride, stride).padding(0, 0).build(); .stride(stride, stride).padding(0, 0).build();
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
@ -765,7 +737,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input,
labels); labels, null, null, true, 128);
assertTrue(msg, gradOK); assertTrue(msg, gradOK);
@ -783,69 +755,66 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
Nd4j.getRandom().setSeed(12345); Nd4j.getRandom().setSeed(12345);
int nOut = 4; int nOut = 4;
int[] minibatchSizes = {1, 3};
int width = 6; int width = 6;
int height = 6; int height = 6;
int[] inputDepths = {1, 3};
int[] kernel = {2, 2}; int[] kernel = {2, 2};
int[] stride = {1, 1}; int[] stride = {1, 1};
int[] padding = {0, 0}; int[] padding = {0, 0};
int[] minibatchSizes = {1, 3, 2};
int[] inputDepths = {1, 3, 2};
int[][] zeroPadLayer = new int[][]{{0, 0, 0, 0}, {1, 1, 0, 0}, {2, 2, 2, 2}}; int[][] zeroPadLayer = new int[][]{{0, 0, 0, 0}, {1, 1, 0, 0}, {2, 2, 2, 2}};
for (int inputDepth : inputDepths) { for( int i=0; i<minibatchSizes.length; i++ ){
for (int minibatchSize : minibatchSizes) { int minibatchSize = minibatchSizes[i];
INDArray input = Nd4j.rand(new int[]{minibatchSize, inputDepth, height, width}); int inputDepth = inputDepths[i];
INDArray labels = Nd4j.zeros(minibatchSize, nOut); int[] zeroPad = zeroPadLayer[i];
for (int i = 0; i < minibatchSize; i++) { INDArray input = Nd4j.rand(DataType.DOUBLE, new int[]{minibatchSize, inputDepth, height, width});
labels.putScalar(new int[]{i, i % nOut}, 1.0); INDArray labels = TestUtils.randomOneHot(minibatchSize, nOut);
}
for (int[] zeroPad : zeroPadLayer) {
MultiLayerConfiguration conf = MultiLayerConfiguration conf =
new NeuralNetConfiguration.Builder().updater(new NoOp()) new NeuralNetConfiguration.Builder().updater(new NoOp())
.dataType(DataType.DOUBLE) .dataType(DataType.DOUBLE)
.dist(new NormalDistribution(0, 1)).list() .dist(new NormalDistribution(0, 1)).list()
.layer(0, new ConvolutionLayer.Builder(kernel, stride, padding) .layer(0, new ConvolutionLayer.Builder(kernel, stride, padding)
.cudnnAllowFallback(false) .nIn(inputDepth).nOut(3).build())//output: (6-2+0)/1+1 = 5
.nIn(inputDepth).nOut(3).build())//output: (6-2+0)/1+1 = 5 .layer(1, new ZeroPaddingLayer.Builder(zeroPad).build()).layer(2,
.layer(1, new ZeroPaddingLayer.Builder(zeroPad).build()).layer(2, new ConvolutionLayer.Builder(kernel, stride,
new ConvolutionLayer.Builder(kernel, stride, padding).nIn(3).nOut(3).build())//output: (6-2+0)/1+1 = 5
padding).nIn(3).nOut(3).cudnnAllowFallback(false).build())//output: (6-2+0)/1+1 = 5 .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(4).build())
.activation(Activation.SOFTMAX).nOut(4).build()) .setInputType(InputType.convolutional(height, width, inputDepth))
.setInputType(InputType.convolutional(height, width, inputDepth)) .build();
.build();
MultiLayerNetwork net = new MultiLayerNetwork(conf); MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init(); net.init();
//Check zero padding activation shape //Check zero padding activation shape
org.deeplearning4j.nn.layers.convolution.ZeroPaddingLayer zpl = org.deeplearning4j.nn.layers.convolution.ZeroPaddingLayer zpl =
(org.deeplearning4j.nn.layers.convolution.ZeroPaddingLayer) net.getLayer(1); (org.deeplearning4j.nn.layers.convolution.ZeroPaddingLayer) net.getLayer(1);
val expShape = new long[]{minibatchSize, inputDepth, height + zeroPad[0] + zeroPad[1], val expShape = new long[]{minibatchSize, inputDepth, height + zeroPad[0] + zeroPad[1],
width + zeroPad[2] + zeroPad[3]}; width + zeroPad[2] + zeroPad[3]};
INDArray out = zpl.activate(input, false, LayerWorkspaceMgr.noWorkspaces()); INDArray out = zpl.activate(input, false, LayerWorkspaceMgr.noWorkspaces());
assertArrayEquals(expShape, out.shape()); assertArrayEquals(expShape, out.shape());
String msg = "minibatch=" + minibatchSize + ", channels=" + inputDepth + ", zeroPad = " String msg = "minibatch=" + minibatchSize + ", channels=" + inputDepth + ", zeroPad = "
+ Arrays.toString(zeroPad); + Arrays.toString(zeroPad);
if (PRINT_RESULTS) { if (PRINT_RESULTS) {
System.out.println(msg); System.out.println(msg);
for (int j = 0; j < net.getnLayers(); j++) for (int j = 0; j < net.getnLayers(); j++)
System.out.println("Layer " + j + " # params: " + net.getLayer(j).numParams()); System.out.println("Layer " + j + " # params: " + net.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(msg, gradOK);
TestUtils.testModelSerialization(net);
}
} }
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(msg, gradOK);
TestUtils.testModelSerialization(net);
} }
} }
@ -853,12 +822,12 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
public void testDeconvolution2D() { public void testDeconvolution2D() {
int nOut = 2; int nOut = 2;
int[] minibatchSizes = new int[]{1, 4, 1, 4, 1, 1, 2, 1}; int[] minibatchSizes = new int[]{1, 3, 3, 1, 3};
int[] kernelSizes = new int[]{1, 1, 3, 3, 1, 1, 3, 3}; int[] kernelSizes = new int[]{1, 1, 1, 3, 3};
int[] strides = {1, 1, 1, 1, 2, 2, 2, 2}; int[] strides = {1, 1, 2, 2, 2};
int[] dilation = {1, 2, 2, 1, 1, 1, 2, 2}; int[] dilation = {1, 2, 1, 2, 2};
Activation[] activations = new Activation[]{Activation.SIGMOID, Activation.TANH, Activation.TANH, Activation.TANH, Activation.TANH, Activation.SIGMOID, Activation.SIGMOID, Activation.SIGMOID}; Activation[] activations = new Activation[]{Activation.SIGMOID, Activation.TANH, Activation.SIGMOID, Activation.SIGMOID, Activation.SIGMOID};
ConvolutionMode[] cModes = new ConvolutionMode[]{Truncate, Truncate, Truncate, Truncate, Truncate, Truncate, Truncate, Truncate}; ConvolutionMode[] cModes = new ConvolutionMode[]{Same, Same, Truncate, Truncate, Truncate};
int width = 7; int width = 7;
int height = 7; int height = 7;
int inputDepth = 3; int inputDepth = 3;
@ -888,23 +857,12 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
.updater(new NoOp()) .updater(new NoOp())
.activation(act) .activation(act)
.list() .list()
.layer(new Deconvolution2D.Builder().name("deconvolution_2D_layer-0")
.cudnnAllowFallback(false)
.kernelSize(1, 1)
.stride(1, 1)
.dilation(0, 0)
.convolutionMode(cm)
.nIn(inputDepth)
.nOut(inputDepth)
.build())
.layer(new Deconvolution2D.Builder().name("deconvolution_2D_layer") .layer(new Deconvolution2D.Builder().name("deconvolution_2D_layer")
.cudnnAllowFallback(false)
.kernelSize(k, k) .kernelSize(k, k)
.stride(s, s) .stride(s, s)
.dilation(d, d) .dilation(d, d)
.convolutionMode(cm) .convolutionMode(cm)
.nIn(inputDepth).nOut(nOut) .nIn(inputDepth).nOut(nOut).build());
.build());
MultiLayerConfiguration conf = b.layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) MultiLayerConfiguration conf = b.layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nOut(nOut).build()) .activation(Activation.SOFTMAX).nOut(nOut).build())
@ -922,7 +880,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
System.out.println(msg); System.out.println(msg);
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels, null, null, true, 100);
assertTrue(msg, gradOK); assertTrue(msg, gradOK);
@ -936,16 +894,16 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
int depthMultiplier = 2; int depthMultiplier = 2;
int nOut = nIn * depthMultiplier; int nOut = nIn * depthMultiplier;
int width = 8; int width = 5;
int height = 8; int height = 5;
Nd4j.getRandom().setSeed(12345); Nd4j.getRandom().setSeed(12345);
int[] ks = new int[]{1,3,1,3,1,3,1,3}; int[] ks = new int[]{1,3,3,1,3};
int[] ss = new int[]{1,1,2,2,1,1,2,2}; int[] ss = new int[]{1,1,1,2,2};
ConvolutionMode[] cms = new ConvolutionMode[]{ ConvolutionMode[] cms = new ConvolutionMode[]{
Truncate, Truncate, Truncate, Truncate, Truncate, Truncate, Truncate, Truncate}; Truncate, Truncate, Truncate, Truncate, Truncate};
int[] mb = new int[]{1,1,3,3,3,1,3,3}; int[] mb = new int[]{1,1,1,3,3};
for( int t=0; t<ks.length; t++ ){ for( int t=0; t<ks.length; t++ ){
@ -987,11 +945,11 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
} }
String msg = " - mb=" + minibatchSize + ", k=" String msg = " - mb=" + minibatchSize + ", k="
+ k + ", s=" + s + ", cm=" + cm; + k + ", nIn=" + nIn + ", depthMul=" + depthMultiplier + ", s=" + s + ", cm=" + cm;
System.out.println(msg); System.out.println(msg);
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels, null, null, true, 256);
assertTrue(msg, gradOK); assertTrue(msg, gradOK);
@ -1004,20 +962,20 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
public void testSeparableConv2D() { public void testSeparableConv2D() {
int nOut = 2; int nOut = 2;
int width = 8; int[] minibatchSizes = new int[]{1, 3};
int height = 8; int width = 6;
int height = 6;
int inputDepth = 3; int inputDepth = 3;
Nd4j.getRandom().setSeed(12345); Nd4j.getRandom().setSeed(12345);
int[] ks = new int[]{1,3,1,3,1,3,1,3}; int[] ks = new int[]{1, 3, 3, 1, 3};
int[] ss = new int[]{1,1,2,2,1,1,2,2}; int[] ss = new int[]{1, 1, 1, 2, 2};
int[] ds = new int[]{1,1,1,1,2,2,2,2}; int[] ds = new int[]{1, 1, 2, 2, 2};
ConvolutionMode[] cms = new ConvolutionMode[]{ ConvolutionMode[] cms = new ConvolutionMode[]{Truncate, Truncate, Truncate, Truncate, Truncate};
Truncate, Truncate, Truncate, Truncate, Truncate, Truncate, Truncate, Truncate}; int[] mb = new int[]{1, 1, 1, 3, 3};
int[] mb = new int[]{1,1,3,3,3,1,3,3};
for( int t=0; t<ks.length; t++ ){ for (int t = 0; t < ks.length; t++) {
int k = ks[t]; int k = ks[t];
int s = ss[t]; int s = ss[t];
@ -1041,10 +999,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
.activation(Activation.TANH) .activation(Activation.TANH)
.convolutionMode(cm) .convolutionMode(cm)
.list() .list()
.layer(new Convolution2D.Builder().kernelSize(1, 1).stride(1, 1)
.nIn(inputDepth).nOut(inputDepth).build())
.layer(new SeparableConvolution2D.Builder().name("Separable conv 2D layer") .layer(new SeparableConvolution2D.Builder().name("Separable conv 2D layer")
.cudnnAllowFallback(false)
.kernelSize(k, k) .kernelSize(k, k)
.stride(s, s) .stride(s, s)
.dilation(d, d) .dilation(d, d)
@ -1067,7 +1022,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
System.out.println(msg); System.out.println(msg);
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels, null, null, true, 50); //Most params are in output layer
assertTrue(msg, gradOK); assertTrue(msg, gradOK);
@ -1079,21 +1034,21 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
public void testCnnDilated() { public void testCnnDilated() {
int nOut = 2; int nOut = 2;
int minibatchSize = 3; int minibatchSize = 2;
int width = 8; int width = 8;
int height = 8; int height = 8;
int inputDepth = 3; int inputDepth = 2;
Nd4j.getRandom().setSeed(12345); Nd4j.getRandom().setSeed(12345);
boolean[] sub = new boolean[]{true,false,true,false,true,false,true,false}; boolean[] sub = new boolean[]{true, true, false, true, false};
int[] stride = new int[]{1,1,2,2,1,1,2,2}; int[] stride = new int[]{1, 1, 1, 2, 2};
int[] kernel = new int[]{2,2,2,2,3,3,3,3}; int[] kernel = new int[]{2, 3, 3, 3, 3};
int[] ds = new int[]{2,3,3,2,2,3,3,2}; int[] ds = new int[]{2, 2, 3, 3, 2};
ConvolutionMode[] cms = new ConvolutionMode[]{Same, Same, Same, Truncate, Truncate, Truncate, Same, Truncate}; ConvolutionMode[] cms = new ConvolutionMode[]{Same, Truncate, Truncate, Same, Truncate};
for(int t=0; t<sub.length; t++ ){ for (int t = 0; t < sub.length; t++) {
boolean subsampling = sub[t]; boolean subsampling = sub[t];
int s = stride[t]; int s = stride[t];
@ -1119,14 +1074,12 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
.kernelSize(k, k) .kernelSize(k, k)
.stride(s, s) .stride(s, s)
.dilation(d, d) .dilation(d, d)
.cudnnAllowFallback(false)
.nIn(inputDepth).nOut(2).build()); .nIn(inputDepth).nOut(2).build());
if (subsampling) { if (subsampling) {
b.layer(new SubsamplingLayer.Builder() b.layer(new SubsamplingLayer.Builder()
.poolingType(SubsamplingLayer.PoolingType.MAX) .poolingType(SubsamplingLayer.PoolingType.MAX)
.kernelSize(k, k) .kernelSize(k, k)
.stride(s, s) .stride(s, s)
.cudnnAllowFallback(false)
.dilation(d, d) .dilation(d, d)
.build()); .build());
} else { } else {
@ -1134,7 +1087,6 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
.kernelSize(k, k) .kernelSize(k, k)
.stride(s, s) .stride(s, s)
.dilation(d, d) .dilation(d, d)
.cudnnAllowFallback(false)
.build()); .build());
} }
@ -1166,7 +1118,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
@Test @Test
public void testCropping2DLayer() { public void testCropping2DLayer() {
Nd4j.getRandom().setSeed(12345); Nd4j.getRandom().setSeed(12345);
int nOut = 4; int nOut = 2;
int[] minibatchSizes = {1, 3}; int[] minibatchSizes = {1, 3};
int width = 12; int width = 12;
@ -1177,7 +1129,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
int[] stride = {1, 1}; int[] stride = {1, 1};
int[] padding = {0, 0}; int[] padding = {0, 0};
int[][] cropTestCases = new int[][]{{0, 0, 0, 0}, {1, 1, 0, 0}, {2, 2, 2, 2}, {1,2,3,4}}; int[][] cropTestCases = new int[][]{{0, 0, 0, 0}, {1, 1, 0, 0}, {2, 2, 2, 2}, {1, 2, 3, 4}};
for (int inputDepth : inputDepths) { for (int inputDepth : inputDepths) {
for (int minibatchSize : minibatchSizes) { for (int minibatchSize : minibatchSizes) {
@ -1195,12 +1147,12 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
.convolutionMode(ConvolutionMode.Same) .convolutionMode(ConvolutionMode.Same)
.weightInit(new NormalDistribution(0, 1)).list() .weightInit(new NormalDistribution(0, 1)).list()
.layer(new ConvolutionLayer.Builder(kernel, stride, padding) .layer(new ConvolutionLayer.Builder(kernel, stride, padding)
.cudnnAllowFallback(false) .nIn(inputDepth).nOut(2).build())//output: (6-2+0)/1+1 = 5
.nIn(inputDepth).nOut(3).build())//output: (6-2+0)/1+1 = 5
.layer(new Cropping2D(crop)) .layer(new Cropping2D(crop))
.layer(new ConvolutionLayer.Builder(kernel, stride,padding).nIn(3).nOut(3).cudnnAllowFallback(false).build()) .layer(new ConvolutionLayer.Builder(kernel, stride, padding).nIn(2).nOut(2).build())
.layer(new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.AVG).kernelSize(3, 3).stride(3, 3).build())
.layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nOut(4).build()) .activation(Activation.SOFTMAX).nOut(nOut).build())
.setInputType(InputType.convolutional(height, width, inputDepth)) .setInputType(InputType.convolutional(height, width, inputDepth))
.build(); .build();
@ -1225,7 +1177,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
} }
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels, null, null, true, 160);
assertTrue(msg, gradOK); assertTrue(msg, gradOK);

View File

@ -369,10 +369,10 @@ public class CuDNNGradientChecks extends BaseDL4JTest {
public void testLSTM() throws Exception { public void testLSTM() throws Exception {
Nd4j.getRandom().setSeed(12345); Nd4j.getRandom().setSeed(12345);
int minibatch = 10; int minibatch = 4;
int inputSize = 8; int inputSize = 3;
int lstmLayerSize = 7; int lstmLayerSize = 4;
int timeSeriesLength = 6; int timeSeriesLength = 3;
int nOut = 4; int nOut = 4;
INDArray input = Nd4j.rand(new int[] {minibatch, inputSize, timeSeriesLength}); INDArray input = Nd4j.rand(new int[] {minibatch, inputSize, timeSeriesLength});
INDArray labels = Nd4j.zeros(minibatch, nOut, timeSeriesLength); INDArray labels = Nd4j.zeros(minibatch, nOut, timeSeriesLength);
@ -417,7 +417,7 @@ public class CuDNNGradientChecks extends BaseDL4JTest {
} }
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels, null, null, true, 32);
assertTrue(gradOK); assertTrue(gradOK);
} }
@ -489,10 +489,7 @@ public class CuDNNGradientChecks extends BaseDL4JTest {
int width = 8; int width = 8;
int height = 8; int height = 8;
int inputDepth = 3; int inputDepth = 3;
int[] kernelSizes = new int[]{2, 3};
int[] strides = {1, 2};
int[] dilation = {2, 3};
ConvolutionMode[] cModes = new ConvolutionMode[]{ConvolutionMode.Truncate, ConvolutionMode.Same};
Nd4j.getRandom().setSeed(12345); Nd4j.getRandom().setSeed(12345);
@ -502,85 +499,88 @@ public class CuDNNGradientChecks extends BaseDL4JTest {
Field f2 = org.deeplearning4j.nn.layers.convolution.subsampling.SubsamplingLayer.class.getDeclaredField("helper"); Field f2 = org.deeplearning4j.nn.layers.convolution.subsampling.SubsamplingLayer.class.getDeclaredField("helper");
f2.setAccessible(true); f2.setAccessible(true);
int[] kernelSizes = new int[]{2, 3, 2};
int[] strides = {1, 2, 2};
int[] dilation = {2, 3, 2};
ConvolutionMode[] cModes = new ConvolutionMode[]{ConvolutionMode.Truncate, ConvolutionMode.Same, ConvolutionMode.Truncate};
for (boolean subsampling : new boolean[]{false, true}) { for (boolean subsampling : new boolean[]{false, true}) {
for (int k : kernelSizes) { for (int t = 0; t < kernelSizes.length; t++) {
for (int s : strides) { int k = kernelSizes[t];
for (int d : dilation) { int s = strides[t];
for (ConvolutionMode cm : cModes) { int d = dilation[t];
ConvolutionMode cm = cModes[t];
//Use larger input with larger dilation values (to avoid invalid config) //Use larger input with larger dilation values (to avoid invalid config)
int w = d * width; int w = d * width;
int h = d * height; int h = d * height;
INDArray input = Nd4j.rand(minibatchSize, w * h * inputDepth); INDArray input = Nd4j.rand(minibatchSize, w * h * inputDepth);
INDArray labels = Nd4j.zeros(minibatchSize, nOut); INDArray labels = Nd4j.zeros(minibatchSize, nOut);
for (int i = 0; i < minibatchSize; i++) { for (int i = 0; i < minibatchSize; i++) {
labels.putScalar(new int[]{i, i % nOut}, 1.0); labels.putScalar(new int[]{i, i % nOut}, 1.0);
}
NeuralNetConfiguration.ListBuilder b = new NeuralNetConfiguration.Builder().seed(12345)
.dataType(DataType.DOUBLE)
.updater(new NoOp())
.activation(Activation.TANH).convolutionMode(cm).list()
.layer(new ConvolutionLayer.Builder().name("layer 0")
.kernelSize(k, k)
.stride(s, s)
.dilation(d, d)
.nIn(inputDepth).nOut(2).build());
if (subsampling) {
b.layer(new SubsamplingLayer.Builder()
.poolingType(SubsamplingLayer.PoolingType.MAX)
.kernelSize(k, k)
.stride(s, s)
.dilation(d, d)
.build());
} else {
b.layer(new ConvolutionLayer.Builder().nIn(2).nOut(2)
.kernelSize(k, k)
.stride(s, s)
.dilation(d, d)
.build());
}
MultiLayerConfiguration conf = b.layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nOut(nOut).build())
.setInputType(InputType.convolutionalFlat(h, w, inputDepth)).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
org.deeplearning4j.nn.layers.convolution.ConvolutionLayer c0 =
(org.deeplearning4j.nn.layers.convolution.ConvolutionLayer)net.getLayer(0);
ConvolutionHelper ch0 = (ConvolutionHelper) f.get(c0);
assertTrue(ch0 instanceof CudnnConvolutionHelper);
if(subsampling){
org.deeplearning4j.nn.layers.convolution.subsampling.SubsamplingLayer s1 =
(org.deeplearning4j.nn.layers.convolution.subsampling.SubsamplingLayer)net.getLayer(1);
SubsamplingHelper sh1 = (SubsamplingHelper) f2.get(s1);
assertTrue(sh1 instanceof SubsamplingHelper);
} else {
org.deeplearning4j.nn.layers.convolution.ConvolutionLayer c1 =
(org.deeplearning4j.nn.layers.convolution.ConvolutionLayer)net.getLayer(1);
ConvolutionHelper ch1 = (ConvolutionHelper) f.get(c1);
assertTrue(ch1 instanceof CudnnConvolutionHelper);
}
for (int i = 0; i < net.getLayers().length; i++) {
System.out.println("nParams, layer " + i + ": " + net.getLayer(i).numParams());
}
String msg = (subsampling ? "subsampling" : "conv") + " - mb=" + minibatchSize + ", k="
+ k + ", s=" + s + ", d=" + d + ", cm=" + cm;
System.out.println(msg);
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(msg, gradOK);
}
}
} }
NeuralNetConfiguration.ListBuilder b = new NeuralNetConfiguration.Builder().seed(12345)
.dataType(DataType.DOUBLE)
.updater(new NoOp())
.activation(Activation.TANH).convolutionMode(cm).list()
.layer(new ConvolutionLayer.Builder().name("layer 0")
.kernelSize(k, k)
.stride(s, s)
.dilation(d, d)
.nIn(inputDepth).nOut(2).build());
if (subsampling) {
b.layer(new SubsamplingLayer.Builder()
.poolingType(SubsamplingLayer.PoolingType.MAX)
.kernelSize(k, k)
.stride(s, s)
.dilation(d, d)
.build());
} else {
b.layer(new ConvolutionLayer.Builder().nIn(2).nOut(2)
.kernelSize(k, k)
.stride(s, s)
.dilation(d, d)
.build());
}
MultiLayerConfiguration conf = b.layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nOut(nOut).build())
.setInputType(InputType.convolutionalFlat(h, w, inputDepth)).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
org.deeplearning4j.nn.layers.convolution.ConvolutionLayer c0 =
(org.deeplearning4j.nn.layers.convolution.ConvolutionLayer) net.getLayer(0);
ConvolutionHelper ch0 = (ConvolutionHelper) f.get(c0);
assertTrue(ch0 instanceof CudnnConvolutionHelper);
if (subsampling) {
org.deeplearning4j.nn.layers.convolution.subsampling.SubsamplingLayer s1 =
(org.deeplearning4j.nn.layers.convolution.subsampling.SubsamplingLayer) net.getLayer(1);
SubsamplingHelper sh1 = (SubsamplingHelper) f2.get(s1);
assertTrue(sh1 instanceof SubsamplingHelper);
} else {
org.deeplearning4j.nn.layers.convolution.ConvolutionLayer c1 =
(org.deeplearning4j.nn.layers.convolution.ConvolutionLayer) net.getLayer(1);
ConvolutionHelper ch1 = (ConvolutionHelper) f.get(c1);
assertTrue(ch1 instanceof CudnnConvolutionHelper);
}
for (int i = 0; i < net.getLayers().length; i++) {
System.out.println("nParams, layer " + i + ": " + net.getLayer(i).numParams());
}
String msg = (subsampling ? "subsampling" : "conv") + " - mb=" + minibatchSize + ", k="
+ k + ", s=" + s + ", d=" + d + ", cm=" + cm;
System.out.println(msg);
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(msg, gradOK);
} }
} }
} }
@ -588,7 +588,7 @@ public class CuDNNGradientChecks extends BaseDL4JTest {
@Test @Test
public void testDropout() { public void testDropout() {
int minibatch = 3; int minibatch = 2;
for (boolean cnn : new boolean[]{false, true}) { for (boolean cnn : new boolean[]{false, true}) {
Nd4j.getRandom().setSeed(12345); Nd4j.getRandom().setSeed(12345);
@ -605,15 +605,15 @@ public class CuDNNGradientChecks extends BaseDL4JTest {
.list(); .list();
if (cnn) { if (cnn) {
builder.layer(new ConvolutionLayer.Builder().kernelSize(3, 3).stride(1, 1).nOut(3).build()); builder.layer(new ConvolutionLayer.Builder().kernelSize(2, 2).stride(2, 2).nOut(2).build());
builder.layer(new ConvolutionLayer.Builder().kernelSize(3, 3).stride(1, 1).nOut(3).build()); builder.layer(new ConvolutionLayer.Builder().kernelSize(2, 2).stride(2, 2).nOut(2).build());
builder.setInputType(InputType.convolutional(8, 8, 3)); builder.setInputType(InputType.convolutional(8, 8, 2));
} else { } else {
builder.layer(new DenseLayer.Builder().nOut(12).build()); builder.layer(new DenseLayer.Builder().nOut(8).build());
builder.layer(new DenseLayer.Builder().nOut(12).build()); builder.layer(new DenseLayer.Builder().nOut(8).build());
builder.setInputType(InputType.feedForward(8)); builder.setInputType(InputType.feedForward(6));
} }
builder.layer(new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build()); builder.layer(new OutputLayer.Builder().nOut(3).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build());
MultiLayerConfiguration conf = builder.build(); MultiLayerConfiguration conf = builder.build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf); MultiLayerNetwork mln = new MultiLayerNetwork(conf);
@ -621,11 +621,11 @@ public class CuDNNGradientChecks extends BaseDL4JTest {
INDArray f; INDArray f;
if (cnn) { if (cnn) {
f = Nd4j.rand(new int[]{minibatch, 3, 8, 8}).muli(10).subi(5); f = Nd4j.rand(new int[]{minibatch, 2, 8, 8}).muli(10).subi(5);
} else { } else {
f = Nd4j.rand(minibatch, 8).muli(10).subi(5); f = Nd4j.rand(minibatch, 6).muli(10).subi(5);
} }
INDArray l = TestUtils.randomOneHot(minibatch, 10); INDArray l = TestUtils.randomOneHot(minibatch, 3);
mln.output(f, true); mln.output(f, true);

View File

@ -0,0 +1,140 @@
/*******************************************************************************
* Copyright (c) 2015-2018 Skymind, Inc.
*
* This program and the accompanying materials are made available under the
* terms of the Apache License, Version 2.0 which is available at
* https://www.apache.org/licenses/LICENSE-2.0.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
* SPDX-License-Identifier: Apache-2.0
******************************************************************************/
package org.deeplearning4j.graph;
import lombok.extern.slf4j.Slf4j;
import org.bytedeco.javacpp.Pointer;
import org.junit.After;
import org.junit.Before;
import org.junit.Rule;
import org.junit.rules.TestName;
import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.api.memory.MemoryWorkspace;
import org.nd4j.linalg.api.ops.executioner.OpExecutioner;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.profiler.ProfilerConfig;
import java.lang.management.ManagementFactory;
import java.util.List;
import java.util.Map;
import java.util.Properties;
@Slf4j
public class BaseDL4JTest {
@Rule
public TestName name = new TestName();
protected long startTime;
protected int threadCountBefore;
/**
* Override this to set the profiling mode for the tests defined in the child class
*/
public OpExecutioner.ProfilingMode getProfilingMode(){
return OpExecutioner.ProfilingMode.SCOPE_PANIC;
}
/**
* Override this to set the datatype of the tests defined in the child class
*/
public DataType getDataType(){
return DataType.DOUBLE;
}
public DataType getDefaultFPDataType(){
return getDataType();
}
@Before
public void beforeTest(){
log.info("{}.{}", getClass().getSimpleName(), name.getMethodName());
Nd4j.getExecutioner().setProfilingMode(getProfilingMode());
Nd4j.getExecutioner().setProfilingConfig(ProfilerConfig.builder().build());
Nd4j.setDefaultDataTypes(getDataType(), getDefaultFPDataType());
startTime = System.currentTimeMillis();
threadCountBefore = ManagementFactory.getThreadMXBean().getThreadCount();
}
@After
public void afterTest(){
//Attempt to keep workspaces isolated between tests
Nd4j.getWorkspaceManager().destroyAllWorkspacesForCurrentThread();
MemoryWorkspace currWS = Nd4j.getMemoryManager().getCurrentWorkspace();
Nd4j.getMemoryManager().setCurrentWorkspace(null);
if(currWS != null){
//Not really safe to continue testing under this situation... other tests will likely fail with obscure
// errors that are hard to track back to this
log.error("Open workspace leaked from test! Exiting - {}, isOpen = {} - {}", currWS.getId(), currWS.isScopeActive(), currWS);
System.exit(1);
}
StringBuilder sb = new StringBuilder();
long maxPhys = Pointer.maxPhysicalBytes();
long maxBytes = Pointer.maxBytes();
long currPhys = Pointer.physicalBytes();
long currBytes = Pointer.totalBytes();
long jvmTotal = Runtime.getRuntime().totalMemory();
long jvmMax = Runtime.getRuntime().maxMemory();
int threadsAfter = ManagementFactory.getThreadMXBean().getThreadCount();
long duration = System.currentTimeMillis() - startTime;
sb.append(getClass().getSimpleName()).append(".").append(name.getMethodName())
.append(": ").append(duration).append(" ms")
.append(", threadCount: (").append(threadCountBefore).append("->").append(threadsAfter).append(")")
.append(", jvmTotal=").append(jvmTotal)
.append(", jvmMax=").append(jvmMax)
.append(", totalBytes=").append(currBytes).append(", maxBytes=").append(maxBytes)
.append(", currPhys=").append(currPhys).append(", maxPhys=").append(maxPhys);
List<MemoryWorkspace> ws = Nd4j.getWorkspaceManager().getAllWorkspacesForCurrentThread();
if(ws != null && ws.size() > 0){
long currSize = 0;
for(MemoryWorkspace w : ws){
currSize += w.getCurrentSize();
}
if(currSize > 0){
sb.append(", threadWSSize=").append(currSize)
.append(" (").append(ws.size()).append(" WSs)");
}
}
Properties p = Nd4j.getExecutioner().getEnvironmentInformation();
Object o = p.get("cuda.devicesInformation");
if(o instanceof List){
List<Map<String,Object>> l = (List<Map<String, Object>>) o;
if(l.size() > 0) {
sb.append(" [").append(l.size())
.append(" GPUs: ");
for (int i = 0; i < l.size(); i++) {
Map<String,Object> m = l.get(i);
if(i > 0)
sb.append(",");
sb.append("(").append(m.get("cuda.freeMemory")).append(" free, ")
.append(m.get("cuda.totalMemory")).append(" total)");
}
sb.append("]");
}
}
log.info(sb.toString());
}
}

View File

@ -17,6 +17,7 @@
package org.deeplearning4j.graph.data; package org.deeplearning4j.graph.data;
import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.ArrayUtils;
import org.deeplearning4j.graph.BaseDL4JTest;
import org.deeplearning4j.graph.api.Edge; import org.deeplearning4j.graph.api.Edge;
import org.deeplearning4j.graph.api.IGraph; import org.deeplearning4j.graph.api.IGraph;
import org.deeplearning4j.graph.data.impl.DelimitedEdgeLineProcessor; import org.deeplearning4j.graph.data.impl.DelimitedEdgeLineProcessor;
@ -32,7 +33,7 @@ import java.util.List;
import static org.junit.Assert.*; import static org.junit.Assert.*;
public class TestGraphLoading { public class TestGraphLoading extends BaseDL4JTest {
@Test(timeout = 10000L) @Test(timeout = 10000L)
public void testEdgeListGraphLoading() throws IOException { public void testEdgeListGraphLoading() throws IOException {

View File

@ -17,6 +17,7 @@
package org.deeplearning4j.graph.data; package org.deeplearning4j.graph.data;
import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.ArrayUtils;
import org.deeplearning4j.graph.BaseDL4JTest;
import org.deeplearning4j.graph.api.Edge; import org.deeplearning4j.graph.api.Edge;
import org.deeplearning4j.graph.api.IGraph; import org.deeplearning4j.graph.api.IGraph;
import org.deeplearning4j.graph.data.impl.WeightedEdgeLineProcessor; import org.deeplearning4j.graph.data.impl.WeightedEdgeLineProcessor;
@ -32,7 +33,7 @@ import java.util.List;
import static junit.framework.TestCase.assertTrue; import static junit.framework.TestCase.assertTrue;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
public class TestGraphLoadingWeighted { public class TestGraphLoadingWeighted extends BaseDL4JTest {
@Test(timeout = 10000L) @Test(timeout = 10000L)
public void testWeightedDirected() throws IOException { public void testWeightedDirected() throws IOException {

View File

@ -17,6 +17,7 @@
package org.deeplearning4j.graph.graph; package org.deeplearning4j.graph.graph;
import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.ArrayUtils;
import org.deeplearning4j.graph.BaseDL4JTest;
import org.deeplearning4j.graph.api.*; import org.deeplearning4j.graph.api.*;
import org.deeplearning4j.graph.data.GraphLoader; import org.deeplearning4j.graph.data.GraphLoader;
import org.deeplearning4j.graph.iterator.RandomWalkIterator; import org.deeplearning4j.graph.iterator.RandomWalkIterator;
@ -34,7 +35,7 @@ import static junit.framework.TestCase.assertTrue;
import static org.junit.Assert.*; import static org.junit.Assert.*;
public class TestGraph { public class TestGraph extends BaseDL4JTest {
@Test(timeout = 10000L) @Test(timeout = 10000L)
public void testSimpleGraph() { public void testSimpleGraph() {

View File

@ -16,6 +16,7 @@
package org.deeplearning4j.graph.models.deepwalk; package org.deeplearning4j.graph.models.deepwalk;
import org.deeplearning4j.graph.BaseDL4JTest;
import org.deeplearning4j.graph.data.GraphLoader; import org.deeplearning4j.graph.data.GraphLoader;
import org.deeplearning4j.graph.graph.Graph; import org.deeplearning4j.graph.graph.Graph;
import org.deeplearning4j.graph.iterator.GraphWalkIterator; import org.deeplearning4j.graph.iterator.GraphWalkIterator;
@ -35,7 +36,7 @@ import java.util.Arrays;
import static org.junit.Assert.*; import static org.junit.Assert.*;
public class DeepWalkGradientCheck { public class DeepWalkGradientCheck extends BaseDL4JTest {
public static final double epsilon = 1e-8; public static final double epsilon = 1e-8;
public static final double MAX_REL_ERROR = 1e-3; public static final double MAX_REL_ERROR = 1e-3;

View File

@ -17,6 +17,7 @@
package org.deeplearning4j.graph.models.deepwalk; package org.deeplearning4j.graph.models.deepwalk;
import org.apache.commons.io.FilenameUtils; import org.apache.commons.io.FilenameUtils;
import org.deeplearning4j.graph.BaseDL4JTest;
import org.deeplearning4j.graph.api.Edge; import org.deeplearning4j.graph.api.Edge;
import org.deeplearning4j.graph.api.IGraph; import org.deeplearning4j.graph.api.IGraph;
import org.deeplearning4j.graph.data.GraphLoader; import org.deeplearning4j.graph.data.GraphLoader;
@ -42,7 +43,7 @@ import java.util.Random;
import static org.junit.Assert.*; import static org.junit.Assert.*;
public class TestDeepWalk { public class TestDeepWalk extends BaseDL4JTest {
@Rule @Rule
public TemporaryFolder testDir = new TemporaryFolder(); public TemporaryFolder testDir = new TemporaryFolder();
@ -214,7 +215,7 @@ public class TestDeepWalk {
Nd4j.getRandom().setSeed(12345); Nd4j.getRandom().setSeed(12345);
int nEpochs = 50; int nEpochs = 5;
//Set up network //Set up network
DeepWalk<String, String> deepWalk = DeepWalk<String, String> deepWalk =

View File

@ -16,6 +16,7 @@
package org.deeplearning4j.graph.models.deepwalk; package org.deeplearning4j.graph.models.deepwalk;
import org.deeplearning4j.graph.BaseDL4JTest;
import org.junit.Test; import org.junit.Test;
import java.util.Arrays; import java.util.Arrays;
@ -24,7 +25,7 @@ import java.util.Set;
import static org.junit.Assert.*; import static org.junit.Assert.*;
public class TestGraphHuffman { public class TestGraphHuffman extends BaseDL4JTest {
@Test(timeout = 10000L) @Test(timeout = 10000L)
public void testGraphHuffman() { public void testGraphHuffman() {

View File

@ -0,0 +1,140 @@
/*******************************************************************************
* Copyright (c) 2015-2018 Skymind, Inc.
*
* This program and the accompanying materials are made available under the
* terms of the Apache License, Version 2.0 which is available at
* https://www.apache.org/licenses/LICENSE-2.0.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
* SPDX-License-Identifier: Apache-2.0
******************************************************************************/
package org.deeplearning4j.nn.modelimport.keras;
import lombok.extern.slf4j.Slf4j;
import org.bytedeco.javacpp.Pointer;
import org.junit.After;
import org.junit.Before;
import org.junit.Rule;
import org.junit.rules.TestName;
import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.api.memory.MemoryWorkspace;
import org.nd4j.linalg.api.ops.executioner.OpExecutioner;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.profiler.ProfilerConfig;
import java.lang.management.ManagementFactory;
import java.util.List;
import java.util.Map;
import java.util.Properties;
@Slf4j
public class BaseDL4JTest {
@Rule
public TestName name = new TestName();
protected long startTime;
protected int threadCountBefore;
/**
* Override this to set the profiling mode for the tests defined in the child class
*/
public OpExecutioner.ProfilingMode getProfilingMode(){
return OpExecutioner.ProfilingMode.SCOPE_PANIC;
}
/**
* Override this to set the datatype of the tests defined in the child class
*/
public DataType getDataType(){
return DataType.DOUBLE;
}
public DataType getDefaultFPDataType(){
return getDataType();
}
@Before
public void beforeTest(){
log.info("{}.{}", getClass().getSimpleName(), name.getMethodName());
Nd4j.getExecutioner().setProfilingMode(getProfilingMode());
Nd4j.getExecutioner().setProfilingConfig(ProfilerConfig.builder().build());
Nd4j.setDefaultDataTypes(getDataType(), getDefaultFPDataType());
startTime = System.currentTimeMillis();
threadCountBefore = ManagementFactory.getThreadMXBean().getThreadCount();
}
@After
public void afterTest(){
//Attempt to keep workspaces isolated between tests
Nd4j.getWorkspaceManager().destroyAllWorkspacesForCurrentThread();
MemoryWorkspace currWS = Nd4j.getMemoryManager().getCurrentWorkspace();
Nd4j.getMemoryManager().setCurrentWorkspace(null);
if(currWS != null){
//Not really safe to continue testing under this situation... other tests will likely fail with obscure
// errors that are hard to track back to this
log.error("Open workspace leaked from test! Exiting - {}, isOpen = {} - {}", currWS.getId(), currWS.isScopeActive(), currWS);
System.exit(1);
}
StringBuilder sb = new StringBuilder();
long maxPhys = Pointer.maxPhysicalBytes();
long maxBytes = Pointer.maxBytes();
long currPhys = Pointer.physicalBytes();
long currBytes = Pointer.totalBytes();
long jvmTotal = Runtime.getRuntime().totalMemory();
long jvmMax = Runtime.getRuntime().maxMemory();
int threadsAfter = ManagementFactory.getThreadMXBean().getThreadCount();
long duration = System.currentTimeMillis() - startTime;
sb.append(getClass().getSimpleName()).append(".").append(name.getMethodName())
.append(": ").append(duration).append(" ms")
.append(", threadCount: (").append(threadCountBefore).append("->").append(threadsAfter).append(")")
.append(", jvmTotal=").append(jvmTotal)
.append(", jvmMax=").append(jvmMax)
.append(", totalBytes=").append(currBytes).append(", maxBytes=").append(maxBytes)
.append(", currPhys=").append(currPhys).append(", maxPhys=").append(maxPhys);
List<MemoryWorkspace> ws = Nd4j.getWorkspaceManager().getAllWorkspacesForCurrentThread();
if(ws != null && ws.size() > 0){
long currSize = 0;
for(MemoryWorkspace w : ws){
currSize += w.getCurrentSize();
}
if(currSize > 0){
sb.append(", threadWSSize=").append(currSize)
.append(" (").append(ws.size()).append(" WSs)");
}
}
Properties p = Nd4j.getExecutioner().getEnvironmentInformation();
Object o = p.get("cuda.devicesInformation");
if(o instanceof List){
List<Map<String,Object>> l = (List<Map<String, Object>>) o;
if(l.size() > 0) {
sb.append(" [").append(l.size())
.append(" GPUs: ");
for (int i = 0; i < l.size(); i++) {
Map<String,Object> m = l.get(i);
if(i > 0)
sb.append(",");
sb.append("(").append(m.get("cuda.freeMemory")).append(" free, ")
.append(m.get("cuda.totalMemory")).append(" total)");
}
sb.append("]");
}
}
log.info(sb.toString());
}
}

View File

@ -38,7 +38,7 @@ import java.util.concurrent.atomic.AtomicInteger;
import static org.junit.Assert.*; import static org.junit.Assert.*;
public class MiscTests { public class MiscTests extends BaseDL4JTest {
@Rule @Rule
public TemporaryFolder testDir = new TemporaryFolder(); public TemporaryFolder testDir = new TemporaryFolder();

View File

@ -24,6 +24,7 @@ import org.deeplearning4j.datasets.datavec.SequenceRecordReaderDataSetIterator;
import org.deeplearning4j.nn.layers.recurrent.LSTM; import org.deeplearning4j.nn.layers.recurrent.LSTM;
import org.deeplearning4j.nn.layers.recurrent.LastTimeStepLayer; import org.deeplearning4j.nn.layers.recurrent.LastTimeStepLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.KerasModel; import org.deeplearning4j.nn.modelimport.keras.KerasModel;
import org.deeplearning4j.nn.modelimport.keras.KerasSequentialModel; import org.deeplearning4j.nn.modelimport.keras.KerasSequentialModel;
import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
@ -54,7 +55,7 @@ import java.util.List;
import static junit.framework.TestCase.assertTrue; import static junit.framework.TestCase.assertTrue;
@Ignore("AB - 2019/05/27 - NPE on CUDA only. Ignored to get all passing baseline on master; see issue 7657") @Ignore("AB - 2019/05/27 - NPE on CUDA only. Ignored to get all passing baseline on master; see issue 7657")
public class FullModelComparisons { public class FullModelComparisons extends BaseDL4JTest {
ClassLoader classLoader = FullModelComparisons.class.getClassLoader(); ClassLoader classLoader = FullModelComparisons.class.getClassLoader();

View File

@ -18,6 +18,7 @@ package org.deeplearning4j.nn.modelimport.keras.configurations;
import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.InputPreProcessor;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.preprocessors.KerasFlattenRnnPreprocessor; import org.deeplearning4j.nn.modelimport.keras.preprocessors.KerasFlattenRnnPreprocessor;
import org.deeplearning4j.nn.modelimport.keras.preprocessors.PermutePreprocessor; import org.deeplearning4j.nn.modelimport.keras.preprocessors.PermutePreprocessor;
import org.deeplearning4j.nn.modelimport.keras.preprocessors.ReshapePreprocessor; import org.deeplearning4j.nn.modelimport.keras.preprocessors.ReshapePreprocessor;
@ -26,7 +27,7 @@ import org.junit.Test;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
public class JsonTest { public class JsonTest extends BaseDL4JTest {
@Test @Test
public void testJsonPreprocessors() throws Exception { public void testJsonPreprocessors() throws Exception {

View File

@ -20,6 +20,7 @@ import lombok.extern.slf4j.Slf4j;
import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.graph.ComputationGraph;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.KerasModel; import org.deeplearning4j.nn.modelimport.keras.KerasModel;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
import org.junit.Test; import org.junit.Test;
@ -36,7 +37,7 @@ import java.io.InputStream;
*/ */
@Slf4j @Slf4j
public class Keras1ModelConfigurationTest { public class Keras1ModelConfigurationTest extends BaseDL4JTest {
private ClassLoader classLoader = getClass().getClassLoader(); private ClassLoader classLoader = getClass().getClassLoader();

View File

@ -21,6 +21,7 @@ import lombok.val;
import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.graph.ComputationGraph;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.KerasLayer; import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
import org.deeplearning4j.nn.modelimport.keras.KerasModel; import org.deeplearning4j.nn.modelimport.keras.KerasModel;
import org.deeplearning4j.nn.modelimport.keras.KerasModelImport; import org.deeplearning4j.nn.modelimport.keras.KerasModelImport;
@ -49,7 +50,7 @@ import static org.junit.Assert.assertArrayEquals;
*/ */
@Slf4j @Slf4j
public class Keras2ModelConfigurationTest { public class Keras2ModelConfigurationTest extends BaseDL4JTest {
ClassLoader classLoader = getClass().getClassLoader(); ClassLoader classLoader = getClass().getClassLoader();

View File

@ -18,6 +18,7 @@ package org.deeplearning4j.nn.modelimport.keras.configurations;
import org.deeplearning4j.nn.conf.distribution.*; import org.deeplearning4j.nn.conf.distribution.*;
import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.DenseLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -31,7 +32,7 @@ import java.util.Map;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
public class KerasInitilizationTest { public class KerasInitilizationTest extends BaseDL4JTest {
private double minValue = -0.2; private double minValue = -0.2;
private double maxValue = 0.2; private double maxValue = 0.2;

View File

@ -18,6 +18,7 @@ package org.deeplearning4j.nn.modelimport.keras.configurations;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import lombok.val; import lombok.val;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.KerasModelImport; import org.deeplearning4j.nn.modelimport.keras.KerasModelImport;
import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
@ -38,10 +39,7 @@ import static org.junit.Assert.assertNotNull;
* Test import of Keras models. * Test import of Keras models.
*/ */
@Slf4j @Slf4j
public class KerasModelImportTest { public class KerasModelImportTest extends BaseDL4JTest {
ClassLoader classLoader = KerasModelImportTest.class.getClassLoader();
@Test @Test
public void testH5WithoutTensorflowScope() throws Exception { public void testH5WithoutTensorflowScope() throws Exception {

View File

@ -20,6 +20,7 @@ import lombok.extern.slf4j.Slf4j;
import org.apache.commons.io.FileUtils; import org.apache.commons.io.FileUtils;
import org.deeplearning4j.common.resources.DL4JResources; import org.deeplearning4j.common.resources.DL4JResources;
import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.graph.ComputationGraph;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.KerasLayer; import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
import org.deeplearning4j.nn.modelimport.keras.KerasModelImport; import org.deeplearning4j.nn.modelimport.keras.KerasModelImport;
import org.deeplearning4j.nn.modelimport.keras.layers.custom.KerasLRN; import org.deeplearning4j.nn.modelimport.keras.layers.custom.KerasLRN;
@ -41,7 +42,7 @@ import java.net.URL;
* @author Justin Long (crockpotveggies) * @author Justin Long (crockpotveggies)
*/ */
@Slf4j @Slf4j
public class KerasCustomLayerTest { public class KerasCustomLayerTest extends BaseDL4JTest {
@Rule @Rule
public TemporaryFolder testDir = new TemporaryFolder(); public TemporaryFolder testDir = new TemporaryFolder();

View File

@ -19,6 +19,7 @@ package org.deeplearning4j.nn.modelimport.keras.e2e;
import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.samediff.SameDiffLambdaLayer; import org.deeplearning4j.nn.conf.layers.samediff.SameDiffLambdaLayer;
import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.graph.ComputationGraph;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.KerasLayer; import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
import org.deeplearning4j.nn.modelimport.keras.KerasModel; import org.deeplearning4j.nn.modelimport.keras.KerasModel;
import org.deeplearning4j.nn.modelimport.keras.KerasSequentialModel; import org.deeplearning4j.nn.modelimport.keras.KerasSequentialModel;
@ -44,7 +45,7 @@ import java.nio.file.StandardCopyOption;
* *
* @author Max Pumperla * @author Max Pumperla
*/ */
public class KerasLambdaTest { public class KerasLambdaTest extends BaseDL4JTest {
@Rule @Rule
public TemporaryFolder testDir = new TemporaryFolder(); public TemporaryFolder testDir = new TemporaryFolder();

View File

@ -32,10 +32,7 @@ import org.deeplearning4j.nn.graph.ComputationGraph;
import org.deeplearning4j.nn.layers.recurrent.LSTM; import org.deeplearning4j.nn.layers.recurrent.LSTM;
import org.deeplearning4j.nn.layers.recurrent.LastTimeStepLayer; import org.deeplearning4j.nn.layers.recurrent.LastTimeStepLayer;
import org.deeplearning4j.nn.layers.wrapper.BaseWrapperLayer; import org.deeplearning4j.nn.layers.wrapper.BaseWrapperLayer;
import org.deeplearning4j.nn.modelimport.keras.Hdf5Archive; import org.deeplearning4j.nn.modelimport.keras.*;
import org.deeplearning4j.nn.modelimport.keras.KerasModel;
import org.deeplearning4j.nn.modelimport.keras.KerasModelImport;
import org.deeplearning4j.nn.modelimport.keras.KerasSequentialModel;
import org.deeplearning4j.nn.modelimport.keras.utils.KerasModelBuilder; import org.deeplearning4j.nn.modelimport.keras.utils.KerasModelBuilder;
import org.deeplearning4j.nn.modelimport.keras.utils.KerasModelUtils; import org.deeplearning4j.nn.modelimport.keras.utils.KerasModelUtils;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
@ -78,7 +75,7 @@ import static org.junit.Assert.assertTrue;
* @author dave@skymind.io, Max Pumperla * @author dave@skymind.io, Max Pumperla
*/ */
@Slf4j @Slf4j
public class KerasModelEndToEndTest { public class KerasModelEndToEndTest extends BaseDL4JTest {
private static final String GROUP_ATTR_INPUTS = "inputs"; private static final String GROUP_ATTR_INPUTS = "inputs";
private static final String GROUP_ATTR_OUTPUTS = "outputs"; private static final String GROUP_ATTR_OUTPUTS = "outputs";
private static final String GROUP_PREDICTIONS = "predictions"; private static final String GROUP_PREDICTIONS = "predictions";

View File

@ -19,6 +19,7 @@ package org.deeplearning4j.nn.modelimport.keras.e2e;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.graph.ComputationGraph;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.KerasLayer; import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
import org.deeplearning4j.nn.modelimport.keras.KerasModel; import org.deeplearning4j.nn.modelimport.keras.KerasModel;
import org.deeplearning4j.nn.modelimport.keras.KerasModelImport; import org.deeplearning4j.nn.modelimport.keras.KerasModelImport;
@ -50,7 +51,7 @@ import java.nio.file.StandardCopyOption;
* @author Max Pumperla * @author Max Pumperla
*/ */
@Slf4j @Slf4j
public class KerasYolo9000PredictTest { public class KerasYolo9000PredictTest extends BaseDL4JTest {
private static final String DL4J_MODEL_FILE_NAME = "."; private static final String DL4J_MODEL_FILE_NAME = ".";
private static ImagePreProcessingScaler IMAGE_PREPROCESSING_SCALER = new ImagePreProcessingScaler(0, 1); private static ImagePreProcessingScaler IMAGE_PREPROCESSING_SCALER = new ImagePreProcessingScaler(0, 1);

View File

@ -18,6 +18,7 @@ package org.deeplearning4j.nn.modelimport.keras.e2e;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.graph.ComputationGraph;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.KerasLayer; import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
import org.deeplearning4j.nn.modelimport.keras.KerasModel; import org.deeplearning4j.nn.modelimport.keras.KerasModel;
import org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasSpaceToDepth; import org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasSpaceToDepth;
@ -47,7 +48,7 @@ import java.nio.file.StandardCopyOption;
* @author Max Pumperla * @author Max Pumperla
*/ */
@Slf4j @Slf4j
public class KerasYolo9000Test { public class KerasYolo9000Test extends BaseDL4JTest {
private static final String TEMP_MODEL_FILENAME = "tempModel"; private static final String TEMP_MODEL_FILENAME = "tempModel";
private static final String H5_EXTENSION = ".h5"; private static final String H5_EXTENSION = ".h5";

View File

@ -17,6 +17,7 @@
package org.deeplearning4j.nn.modelimport.keras.layers.advanced.activation; package org.deeplearning4j.nn.modelimport.keras.layers.advanced.activation;
import org.deeplearning4j.nn.conf.layers.ActivationLayer; import org.deeplearning4j.nn.conf.layers.ActivationLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -31,7 +32,7 @@ import static org.junit.Assert.assertEquals;
/** /**
* @author Max Pumperla * @author Max Pumperla
*/ */
public class KerasLeakyReLUTest { public class KerasLeakyReLUTest extends BaseDL4JTest {
private Keras1LayerConfiguration conf1 = new Keras1LayerConfiguration(); private Keras1LayerConfiguration conf1 = new Keras1LayerConfiguration();
private Keras2LayerConfiguration conf2 = new Keras2LayerConfiguration(); private Keras2LayerConfiguration conf2 = new Keras2LayerConfiguration();

View File

@ -18,6 +18,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.advanced.activation;
import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.PReLULayer; import org.deeplearning4j.nn.conf.layers.PReLULayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -35,7 +36,7 @@ import static org.junit.Assert.assertEquals;
/** /**
* @author Max Pumperla * @author Max Pumperla
*/ */
public class KerasPReLUTest { public class KerasPReLUTest extends BaseDL4JTest {
private Keras1LayerConfiguration conf1 = new Keras1LayerConfiguration(); private Keras1LayerConfiguration conf1 = new Keras1LayerConfiguration();
private Keras2LayerConfiguration conf2 = new Keras2LayerConfiguration(); private Keras2LayerConfiguration conf2 = new Keras2LayerConfiguration();

View File

@ -17,6 +17,7 @@
package org.deeplearning4j.nn.modelimport.keras.layers.advanced.activation; package org.deeplearning4j.nn.modelimport.keras.layers.advanced.activation;
import org.deeplearning4j.nn.conf.layers.ActivationLayer; import org.deeplearning4j.nn.conf.layers.ActivationLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -31,7 +32,7 @@ import static org.junit.Assert.assertEquals;
/** /**
* @author Max Pumperla * @author Max Pumperla
*/ */
public class KerasThresholdedReLUTest { public class KerasThresholdedReLUTest extends BaseDL4JTest {
private Keras1LayerConfiguration conf1 = new Keras1LayerConfiguration(); private Keras1LayerConfiguration conf1 = new Keras1LayerConfiguration();
private Keras2LayerConfiguration conf2 = new Keras2LayerConfiguration(); private Keras2LayerConfiguration conf2 = new Keras2LayerConfiguration();

View File

@ -19,6 +19,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.convolution;
import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.ConvolutionMode;
import org.deeplearning4j.nn.conf.dropout.Dropout; import org.deeplearning4j.nn.conf.dropout.Dropout;
import org.deeplearning4j.nn.conf.layers.Convolution1DLayer; import org.deeplearning4j.nn.conf.layers.Convolution1DLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.KerasTestUtils; import org.deeplearning4j.nn.modelimport.keras.KerasTestUtils;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -40,7 +41,7 @@ import static org.junit.Assert.assertNotNull;
/** /**
* @author Max Pumperla * @author Max Pumperla
*/ */
public class KerasAtrousConvolution1DTest { public class KerasAtrousConvolution1DTest extends BaseDL4JTest {
private final String ACTIVATION_KERAS = "linear"; private final String ACTIVATION_KERAS = "linear";
private final String ACTIVATION_DL4J = "identity"; private final String ACTIVATION_DL4J = "identity";

View File

@ -19,6 +19,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.convolution;
import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.ConvolutionMode;
import org.deeplearning4j.nn.conf.dropout.Dropout; import org.deeplearning4j.nn.conf.dropout.Dropout;
import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.KerasTestUtils; import org.deeplearning4j.nn.modelimport.keras.KerasTestUtils;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -38,7 +39,7 @@ import static org.junit.Assert.assertEquals;
/** /**
* @author Max Pumperla * @author Max Pumperla
*/ */
public class KerasAtrousConvolution2DTest { public class KerasAtrousConvolution2DTest extends BaseDL4JTest {
private final String ACTIVATION_KERAS = "linear"; private final String ACTIVATION_KERAS = "linear";
private final String ACTIVATION_DL4J = "identity"; private final String ACTIVATION_DL4J = "identity";

View File

@ -19,6 +19,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.convolution;
import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.ConvolutionMode;
import org.deeplearning4j.nn.conf.dropout.Dropout; import org.deeplearning4j.nn.conf.dropout.Dropout;
import org.deeplearning4j.nn.conf.layers.Convolution1DLayer; import org.deeplearning4j.nn.conf.layers.Convolution1DLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.KerasTestUtils; import org.deeplearning4j.nn.modelimport.keras.KerasTestUtils;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
@ -37,7 +38,7 @@ import static org.junit.Assert.assertEquals;
/** /**
* @author Max Pumperla * @author Max Pumperla
*/ */
public class KerasConvolution1DTest { public class KerasConvolution1DTest extends BaseDL4JTest {
private final String ACTIVATION_KERAS = "linear"; private final String ACTIVATION_KERAS = "linear";
private final String ACTIVATION_DL4J = "identity"; private final String ACTIVATION_DL4J = "identity";

View File

@ -19,6 +19,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.convolution;
import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.ConvolutionMode;
import org.deeplearning4j.nn.conf.dropout.Dropout; import org.deeplearning4j.nn.conf.dropout.Dropout;
import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.KerasTestUtils; import org.deeplearning4j.nn.modelimport.keras.KerasTestUtils;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
@ -39,7 +40,7 @@ import static org.junit.Assert.assertEquals;
/** /**
* @author Max Pumperla * @author Max Pumperla
*/ */
public class KerasConvolution2DTest { public class KerasConvolution2DTest extends BaseDL4JTest {
private final String ACTIVATION_KERAS = "linear"; private final String ACTIVATION_KERAS = "linear";
private final String ACTIVATION_DL4J = "identity"; private final String ACTIVATION_DL4J = "identity";

View File

@ -19,6 +19,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.convolution;
import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.ConvolutionMode;
import org.deeplearning4j.nn.conf.dropout.Dropout; import org.deeplearning4j.nn.conf.dropout.Dropout;
import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.KerasTestUtils; import org.deeplearning4j.nn.modelimport.keras.KerasTestUtils;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
@ -43,7 +44,7 @@ import static org.junit.Assert.assertNotNull;
/** /**
* @author Max Pumperla * @author Max Pumperla
*/ */
public class KerasConvolution3DTest { public class KerasConvolution3DTest extends BaseDL4JTest {
private final String ACTIVATION_KERAS = "linear"; private final String ACTIVATION_KERAS = "linear";
private final String ACTIVATION_DL4J = "identity"; private final String ACTIVATION_DL4J = "identity";

View File

@ -17,6 +17,7 @@
package org.deeplearning4j.nn.modelimport.keras.layers.convolution; package org.deeplearning4j.nn.modelimport.keras.layers.convolution;
import org.deeplearning4j.nn.conf.layers.convolutional.Cropping1D; import org.deeplearning4j.nn.conf.layers.convolutional.Cropping1D;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -32,7 +33,7 @@ import static org.junit.Assert.assertEquals;
/** /**
* @author Max Pumperla * @author Max Pumperla
*/ */
public class KerasCropping1DTest { public class KerasCropping1DTest extends BaseDL4JTest {
private final String LAYER_NAME = "cropping_1D_layer"; private final String LAYER_NAME = "cropping_1D_layer";
private final int CROPPING = 2; private final int CROPPING = 2;

View File

@ -17,6 +17,7 @@
package org.deeplearning4j.nn.modelimport.keras.layers.convolution; package org.deeplearning4j.nn.modelimport.keras.layers.convolution;
import org.deeplearning4j.nn.conf.layers.convolutional.Cropping2D; import org.deeplearning4j.nn.conf.layers.convolutional.Cropping2D;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -32,7 +33,7 @@ import static org.junit.Assert.assertEquals;
/** /**
* @author Max Pumperla * @author Max Pumperla
*/ */
public class KerasCropping2DTest { public class KerasCropping2DTest extends BaseDL4JTest {
private final String LAYER_NAME = "cropping_2D_layer"; private final String LAYER_NAME = "cropping_2D_layer";
private final int[] CROPPING = new int[]{2, 3}; private final int[] CROPPING = new int[]{2, 3};

View File

@ -18,6 +18,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.convolution;
import org.deeplearning4j.nn.conf.layers.convolutional.Cropping2D; import org.deeplearning4j.nn.conf.layers.convolutional.Cropping2D;
import org.deeplearning4j.nn.conf.layers.convolutional.Cropping3D; import org.deeplearning4j.nn.conf.layers.convolutional.Cropping3D;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -34,7 +35,7 @@ import static org.junit.Assert.assertEquals;
/** /**
* @author Max Pumperla * @author Max Pumperla
*/ */
public class KerasCropping3DTest { public class KerasCropping3DTest extends BaseDL4JTest {
private final String LAYER_NAME = "cropping_3D_layer"; private final String LAYER_NAME = "cropping_3D_layer";
private final int[] CROPPING = new int[]{2, 3, 5}; private final int[] CROPPING = new int[]{2, 3, 5};

View File

@ -19,6 +19,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.convolution;
import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.ConvolutionMode;
import org.deeplearning4j.nn.conf.dropout.Dropout; import org.deeplearning4j.nn.conf.dropout.Dropout;
import org.deeplearning4j.nn.conf.layers.Deconvolution2D; import org.deeplearning4j.nn.conf.layers.Deconvolution2D;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.KerasTestUtils; import org.deeplearning4j.nn.modelimport.keras.KerasTestUtils;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
@ -39,7 +40,7 @@ import static org.junit.Assert.assertEquals;
/** /**
* @author Max Pumperla * @author Max Pumperla
*/ */
public class KerasDeconvolution2DTest { public class KerasDeconvolution2DTest extends BaseDL4JTest {
private final String ACTIVATION_KERAS = "linear"; private final String ACTIVATION_KERAS = "linear";
private final String ACTIVATION_DL4J = "identity"; private final String ACTIVATION_DL4J = "identity";

View File

@ -19,6 +19,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.convolution;
import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.ConvolutionMode;
import org.deeplearning4j.nn.conf.dropout.Dropout; import org.deeplearning4j.nn.conf.dropout.Dropout;
import org.deeplearning4j.nn.conf.layers.DepthwiseConvolution2D; import org.deeplearning4j.nn.conf.layers.DepthwiseConvolution2D;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.KerasLayer; import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
import org.deeplearning4j.nn.modelimport.keras.KerasTestUtils; import org.deeplearning4j.nn.modelimport.keras.KerasTestUtils;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
@ -42,7 +43,7 @@ import static org.junit.Assert.assertNotNull;
/** /**
* @author Max Pumperla * @author Max Pumperla
*/ */
public class KerasDepthwiseConvolution2DTest { public class KerasDepthwiseConvolution2DTest extends BaseDL4JTest {
private final String ACTIVATION_KERAS = "linear"; private final String ACTIVATION_KERAS = "linear";
private final String ACTIVATION_DL4J = "identity"; private final String ACTIVATION_DL4J = "identity";

View File

@ -19,6 +19,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.convolution;
import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.ConvolutionMode;
import org.deeplearning4j.nn.conf.dropout.Dropout; import org.deeplearning4j.nn.conf.dropout.Dropout;
import org.deeplearning4j.nn.conf.layers.SeparableConvolution2D; import org.deeplearning4j.nn.conf.layers.SeparableConvolution2D;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.KerasTestUtils; import org.deeplearning4j.nn.modelimport.keras.KerasTestUtils;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
@ -39,7 +40,7 @@ import static org.junit.Assert.assertEquals;
/** /**
* @author Max Pumperla * @author Max Pumperla
*/ */
public class KerasSeparableConvolution2DTest { public class KerasSeparableConvolution2DTest extends BaseDL4JTest {
private final String ACTIVATION_KERAS = "linear"; private final String ACTIVATION_KERAS = "linear";
private final String ACTIVATION_DL4J = "identity"; private final String ACTIVATION_DL4J = "identity";

View File

@ -18,6 +18,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.convolution;
import org.deeplearning4j.nn.conf.layers.Upsampling1D; import org.deeplearning4j.nn.conf.layers.Upsampling1D;
import org.deeplearning4j.nn.conf.layers.Upsampling2D; import org.deeplearning4j.nn.conf.layers.Upsampling2D;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -35,7 +36,7 @@ import static org.junit.Assert.assertEquals;
/** /**
* @author Max Pumperla * @author Max Pumperla
*/ */
public class KerasUpsampling1DTest { public class KerasUpsampling1DTest extends BaseDL4JTest {
private final String LAYER_NAME = "upsampling_1D_layer"; private final String LAYER_NAME = "upsampling_1D_layer";
private int size = 4; private int size = 4;

View File

@ -18,6 +18,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.convolution;
import org.deeplearning4j.nn.conf.layers.Upsampling2D; import org.deeplearning4j.nn.conf.layers.Upsampling2D;
import org.deeplearning4j.nn.conf.layers.ZeroPadding1DLayer; import org.deeplearning4j.nn.conf.layers.ZeroPadding1DLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -35,7 +36,7 @@ import static org.junit.Assert.assertEquals;
/** /**
* @author Max Pumperla * @author Max Pumperla
*/ */
public class KerasUpsampling2DTest { public class KerasUpsampling2DTest extends BaseDL4JTest {
private final String LAYER_NAME = "upsampling_2D_layer"; private final String LAYER_NAME = "upsampling_2D_layer";
private int[] size = new int[]{2, 2}; private int[] size = new int[]{2, 2};

View File

@ -17,6 +17,7 @@
package org.deeplearning4j.nn.modelimport.keras.layers.convolution; package org.deeplearning4j.nn.modelimport.keras.layers.convolution;
import org.deeplearning4j.nn.conf.layers.Upsampling3D; import org.deeplearning4j.nn.conf.layers.Upsampling3D;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -33,7 +34,7 @@ import static org.junit.Assert.assertEquals;
/** /**
* @author Max Pumperla * @author Max Pumperla
*/ */
public class KerasUpsampling3DTest { public class KerasUpsampling3DTest extends BaseDL4JTest {
private final String LAYER_NAME = "upsampling_3D_layer"; private final String LAYER_NAME = "upsampling_3D_layer";
private int[] size = new int[]{2, 2, 2}; private int[] size = new int[]{2, 2, 2};

View File

@ -17,6 +17,7 @@
package org.deeplearning4j.nn.modelimport.keras.layers.convolution; package org.deeplearning4j.nn.modelimport.keras.layers.convolution;
import org.deeplearning4j.nn.conf.layers.ZeroPadding1DLayer; import org.deeplearning4j.nn.conf.layers.ZeroPadding1DLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -31,7 +32,7 @@ import static org.junit.Assert.assertEquals;
/** /**
* @author Max Pumperla * @author Max Pumperla
*/ */
public class KerasZeroPadding1DTest { public class KerasZeroPadding1DTest extends BaseDL4JTest {
private Keras1LayerConfiguration conf1 = new Keras1LayerConfiguration(); private Keras1LayerConfiguration conf1 = new Keras1LayerConfiguration();
private Keras2LayerConfiguration conf2 = new Keras2LayerConfiguration(); private Keras2LayerConfiguration conf2 = new Keras2LayerConfiguration();

View File

@ -17,6 +17,7 @@
package org.deeplearning4j.nn.modelimport.keras.layers.convolution; package org.deeplearning4j.nn.modelimport.keras.layers.convolution;
import org.deeplearning4j.nn.conf.layers.ZeroPaddingLayer; import org.deeplearning4j.nn.conf.layers.ZeroPaddingLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -32,7 +33,7 @@ import static org.junit.Assert.assertEquals;
/** /**
* @author Max Pumperla * @author Max Pumperla
*/ */
public class KerasZeroPadding2DTest { public class KerasZeroPadding2DTest extends BaseDL4JTest {
private final String LAYER_NAME = "zero_padding_2D_layer"; private final String LAYER_NAME = "zero_padding_2D_layer";
private final int[] ZERO_PADDING = new int[]{2, 3}; private final int[] ZERO_PADDING = new int[]{2, 3};

View File

@ -18,6 +18,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.convolution;
import org.deeplearning4j.nn.conf.layers.ZeroPadding3DLayer; import org.deeplearning4j.nn.conf.layers.ZeroPadding3DLayer;
import org.deeplearning4j.nn.conf.layers.ZeroPaddingLayer; import org.deeplearning4j.nn.conf.layers.ZeroPaddingLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -34,7 +35,7 @@ import static org.junit.Assert.assertEquals;
/** /**
* @author Max Pumperla * @author Max Pumperla
*/ */
public class KerasZeroPadding3DTest { public class KerasZeroPadding3DTest extends BaseDL4JTest {
private final String LAYER_NAME = "zero_padding_3D_layer"; private final String LAYER_NAME = "zero_padding_3D_layer";
private final int[] ZERO_PADDING = new int[]{2, 3, 4}; private final int[] ZERO_PADDING = new int[]{2, 3, 4};

View File

@ -17,6 +17,7 @@
package org.deeplearning4j.nn.modelimport.keras.layers.core; package org.deeplearning4j.nn.modelimport.keras.layers.core;
import org.deeplearning4j.nn.conf.layers.ActivationLayer; import org.deeplearning4j.nn.conf.layers.ActivationLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -27,7 +28,7 @@ import java.util.Map;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
public class KerasActivationLayer { public class KerasActivationLayer extends BaseDL4JTest {
private final String ACTIVATION_KERAS = "linear"; private final String ACTIVATION_KERAS = "linear";
private final String ACTIVATION_DL4J = "identity"; private final String ACTIVATION_DL4J = "identity";

View File

@ -18,6 +18,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.core;
import org.deeplearning4j.nn.conf.dropout.Dropout; import org.deeplearning4j.nn.conf.dropout.Dropout;
import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.DenseLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.KerasTestUtils; import org.deeplearning4j.nn.modelimport.keras.KerasTestUtils;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
@ -39,7 +40,7 @@ import static org.junit.Assert.assertNotNull;
/** /**
* @author Max Pumperla * @author Max Pumperla
*/ */
public class KerasDenseTest { public class KerasDenseTest extends BaseDL4JTest {
private Integer keras1 = 1; private Integer keras1 = 1;
private Integer keras2 = 2; private Integer keras2 = 2;

View File

@ -18,6 +18,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.core;
import org.deeplearning4j.nn.conf.dropout.Dropout; import org.deeplearning4j.nn.conf.dropout.Dropout;
import org.deeplearning4j.nn.conf.layers.DropoutLayer; import org.deeplearning4j.nn.conf.layers.DropoutLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -31,7 +32,7 @@ import static org.junit.Assert.assertEquals;
/** /**
* @author Max Pumperla * @author Max Pumperla
*/ */
public class KerasDropoutTest { public class KerasDropoutTest extends BaseDL4JTest {
String LAYER_NAME = "dropout"; String LAYER_NAME = "dropout";
private final double DROPOUT_KERAS = 0.3; private final double DROPOUT_KERAS = 0.3;

View File

@ -17,6 +17,7 @@
package org.deeplearning4j.nn.modelimport.keras.layers.core; package org.deeplearning4j.nn.modelimport.keras.layers.core;
import org.deeplearning4j.nn.conf.layers.util.MaskZeroLayer; import org.deeplearning4j.nn.conf.layers.util.MaskZeroLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -31,7 +32,7 @@ import static org.junit.Assert.assertEquals;
/** /**
* @author Max Pumperla * @author Max Pumperla
*/ */
public class KerasMaskingTest { public class KerasMaskingTest extends BaseDL4JTest {
private Keras1LayerConfiguration conf1 = new Keras1LayerConfiguration(); private Keras1LayerConfiguration conf1 = new Keras1LayerConfiguration();

View File

@ -17,6 +17,7 @@
package org.deeplearning4j.nn.modelimport.keras.layers.core; package org.deeplearning4j.nn.modelimport.keras.layers.core;
import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -33,7 +34,7 @@ import static org.junit.Assert.assertEquals;
/** /**
* @author Max Pumperla * @author Max Pumperla
*/ */
public class KerasPermuteTest { public class KerasPermuteTest extends BaseDL4JTest {
private Integer keras1 = 1; private Integer keras1 = 1;
private Integer keras2 = 2; private Integer keras2 = 2;

View File

@ -17,6 +17,7 @@
package org.deeplearning4j.nn.modelimport.keras.layers.core; package org.deeplearning4j.nn.modelimport.keras.layers.core;
import org.deeplearning4j.nn.conf.layers.misc.RepeatVector; import org.deeplearning4j.nn.conf.layers.misc.RepeatVector;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -30,7 +31,7 @@ import static org.junit.Assert.assertEquals;
/** /**
* @author Max Pumperla * @author Max Pumperla
*/ */
public class KerasRepeatVectorTest { public class KerasRepeatVectorTest extends BaseDL4JTest {
String LAYER_NAME = "repeat"; String LAYER_NAME = "repeat";
private int REPEAT = 4; private int REPEAT = 4;

View File

@ -17,6 +17,7 @@
package org.deeplearning4j.nn.modelimport.keras.layers.core; package org.deeplearning4j.nn.modelimport.keras.layers.core;
import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -36,7 +37,7 @@ import static org.junit.Assert.assertEquals;
/** /**
* @author Max Pumperla * @author Max Pumperla
*/ */
public class KerasReshapeTest { public class KerasReshapeTest extends BaseDL4JTest {
private Integer keras1 = 1; private Integer keras1 = 1;
private Integer keras2 = 2; private Integer keras2 = 2;

View File

@ -18,6 +18,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.core;
import org.deeplearning4j.nn.conf.dropout.SpatialDropout; import org.deeplearning4j.nn.conf.dropout.SpatialDropout;
import org.deeplearning4j.nn.conf.layers.DropoutLayer; import org.deeplearning4j.nn.conf.layers.DropoutLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -31,7 +32,7 @@ import static org.junit.Assert.assertEquals;
/** /**
* @author Max Pumperla * @author Max Pumperla
*/ */
public class KerasSpatialDropout2DTest { public class KerasSpatialDropout2DTest extends BaseDL4JTest {
String LAYER_NAME = "spatial_dropout_2d"; String LAYER_NAME = "spatial_dropout_2d";
private final double RATE_KERAS = 0.3; private final double RATE_KERAS = 0.3;

View File

@ -17,6 +17,7 @@
package org.deeplearning4j.nn.modelimport.keras.layers.embeddings; package org.deeplearning4j.nn.modelimport.keras.layers.embeddings;
import org.deeplearning4j.nn.conf.layers.EmbeddingSequenceLayer; import org.deeplearning4j.nn.conf.layers.EmbeddingSequenceLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -36,7 +37,7 @@ import static org.junit.Assert.assertEquals;
/** /**
* @author Max Pumperla * @author Max Pumperla
*/ */
public class KerasEmbeddingTest { public class KerasEmbeddingTest extends BaseDL4JTest {
private final String LAYER_NAME = "embedding_sequence_layer"; private final String LAYER_NAME = "embedding_sequence_layer";
private final String INIT_KERAS = "glorot_normal"; private final String INIT_KERAS = "glorot_normal";

View File

@ -21,6 +21,7 @@ import org.deeplearning4j.nn.conf.dropout.Dropout;
import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.LocallyConnected1D; import org.deeplearning4j.nn.conf.layers.LocallyConnected1D;
import org.deeplearning4j.nn.conf.layers.LocallyConnected2D; import org.deeplearning4j.nn.conf.layers.LocallyConnected2D;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.KerasTestUtils; import org.deeplearning4j.nn.modelimport.keras.KerasTestUtils;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
@ -39,7 +40,7 @@ import static org.junit.Assert.assertEquals;
/** /**
* @author Max Pumperla * @author Max Pumperla
*/ */
public class KerasLocallyConnected1DTest { public class KerasLocallyConnected1DTest extends BaseDL4JTest {
private final String ACTIVATION_KERAS = "linear"; private final String ACTIVATION_KERAS = "linear";
private final String ACTIVATION_DL4J = "identity"; private final String ACTIVATION_DL4J = "identity";

View File

@ -20,6 +20,7 @@ import org.deeplearning4j.nn.conf.ConvolutionMode;
import org.deeplearning4j.nn.conf.dropout.Dropout; import org.deeplearning4j.nn.conf.dropout.Dropout;
import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.LocallyConnected2D; import org.deeplearning4j.nn.conf.layers.LocallyConnected2D;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.KerasTestUtils; import org.deeplearning4j.nn.modelimport.keras.KerasTestUtils;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
@ -39,7 +40,7 @@ import static org.junit.Assert.assertNotNull;
/** /**
* @author Max Pumperla * @author Max Pumperla
*/ */
public class KerasLocallyConnected2DTest { public class KerasLocallyConnected2DTest extends BaseDL4JTest {
private final String ACTIVATION_KERAS = "linear"; private final String ACTIVATION_KERAS = "linear";
private final String ACTIVATION_DL4J = "identity"; private final String ACTIVATION_DL4J = "identity";

View File

@ -18,6 +18,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.noise;
import org.deeplearning4j.nn.conf.dropout.AlphaDropout; import org.deeplearning4j.nn.conf.dropout.AlphaDropout;
import org.deeplearning4j.nn.conf.layers.DropoutLayer; import org.deeplearning4j.nn.conf.layers.DropoutLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -31,7 +32,7 @@ import static org.junit.Assert.assertEquals;
/** /**
* @author Max Pumperla * @author Max Pumperla
*/ */
public class KerasAlphaDropoutTest { public class KerasAlphaDropoutTest extends BaseDL4JTest {
String LAYER_NAME = "alpha_dropout"; String LAYER_NAME = "alpha_dropout";
private final double RATE_KERAS = 0.3; private final double RATE_KERAS = 0.3;

View File

@ -18,6 +18,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.noise;
import org.deeplearning4j.nn.conf.dropout.GaussianDropout; import org.deeplearning4j.nn.conf.dropout.GaussianDropout;
import org.deeplearning4j.nn.conf.layers.DropoutLayer; import org.deeplearning4j.nn.conf.layers.DropoutLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -31,7 +32,7 @@ import static org.junit.Assert.assertEquals;
/** /**
* @author Max Pumperla * @author Max Pumperla
*/ */
public class KerasGaussianDropoutTest { public class KerasGaussianDropoutTest extends BaseDL4JTest {
String LAYER_NAME = "gaussian_dropout"; String LAYER_NAME = "gaussian_dropout";
private final double RATE_KERAS = 0.3; private final double RATE_KERAS = 0.3;

View File

@ -18,6 +18,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.noise;
import org.deeplearning4j.nn.conf.dropout.GaussianNoise; import org.deeplearning4j.nn.conf.dropout.GaussianNoise;
import org.deeplearning4j.nn.conf.layers.DropoutLayer; import org.deeplearning4j.nn.conf.layers.DropoutLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -31,7 +32,7 @@ import static org.junit.Assert.assertEquals;
/** /**
* @author Max Pumperla * @author Max Pumperla
*/ */
public class KerasGaussianNoiseTest { public class KerasGaussianNoiseTest extends BaseDL4JTest {
String LAYER_NAME = "gaussian_noise"; String LAYER_NAME = "gaussian_noise";
private final double STDDEV = 0.3; private final double STDDEV = 0.3;

View File

@ -17,6 +17,7 @@
package org.deeplearning4j.nn.modelimport.keras.layers.normalization; package org.deeplearning4j.nn.modelimport.keras.layers.normalization;
import org.deeplearning4j.nn.conf.layers.BatchNormalization; import org.deeplearning4j.nn.conf.layers.BatchNormalization;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -32,7 +33,7 @@ import static org.junit.Assert.assertEquals;
/** /**
* @author Max Pumperla * @author Max Pumperla
*/ */
public class KerasBatchNormalizationTest { public class KerasBatchNormalizationTest extends BaseDL4JTest {
public static final String PARAM_NAME_BETA = "beta"; public static final String PARAM_NAME_BETA = "beta";
private final String LAYER_NAME = "batch_norm_layer"; private final String LAYER_NAME = "batch_norm_layer";

View File

@ -19,6 +19,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.pooling;
import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.ConvolutionMode;
import org.deeplearning4j.nn.conf.layers.PoolingType; import org.deeplearning4j.nn.conf.layers.PoolingType;
import org.deeplearning4j.nn.conf.layers.Subsampling1DLayer; import org.deeplearning4j.nn.conf.layers.Subsampling1DLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -33,7 +34,7 @@ import static org.junit.Assert.assertEquals;
/** /**
* @author Max Pumperla * @author Max Pumperla
*/ */
public class KerasPooling1DTest { public class KerasPooling1DTest extends BaseDL4JTest {
private final String LAYER_NAME = "test_layer"; private final String LAYER_NAME = "test_layer";
private final int[] KERNEL_SIZE = new int[]{2}; private final int[] KERNEL_SIZE = new int[]{2};

View File

@ -19,6 +19,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.pooling;
import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.ConvolutionMode;
import org.deeplearning4j.nn.conf.layers.PoolingType; import org.deeplearning4j.nn.conf.layers.PoolingType;
import org.deeplearning4j.nn.conf.layers.SubsamplingLayer; import org.deeplearning4j.nn.conf.layers.SubsamplingLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -35,7 +36,7 @@ import static org.junit.Assert.assertEquals;
/** /**
* @author Max Pumperla * @author Max Pumperla
*/ */
public class KerasPooling2DTest { public class KerasPooling2DTest extends BaseDL4JTest {
private final String LAYER_NAME = "test_layer"; private final String LAYER_NAME = "test_layer";
private final int[] KERNEL_SIZE = new int[]{1, 2}; private final int[] KERNEL_SIZE = new int[]{1, 2};

View File

@ -20,6 +20,7 @@ import org.deeplearning4j.nn.conf.ConvolutionMode;
import org.deeplearning4j.nn.conf.layers.PoolingType; import org.deeplearning4j.nn.conf.layers.PoolingType;
import org.deeplearning4j.nn.conf.layers.Subsampling3DLayer; import org.deeplearning4j.nn.conf.layers.Subsampling3DLayer;
import org.deeplearning4j.nn.conf.layers.SubsamplingLayer; import org.deeplearning4j.nn.conf.layers.SubsamplingLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -36,7 +37,7 @@ import static org.junit.Assert.assertEquals;
/** /**
* @author Max Pumperla * @author Max Pumperla
*/ */
public class KerasPooling3DTest { public class KerasPooling3DTest extends BaseDL4JTest {
private final String LAYER_NAME = "pooling_3d"; private final String LAYER_NAME = "pooling_3d";
private final int[] KERNEL_SIZE = new int[]{2, 2, 2}; private final int[] KERNEL_SIZE = new int[]{2, 2, 2};

View File

@ -21,6 +21,7 @@ import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.LSTM; import org.deeplearning4j.nn.conf.layers.LSTM;
import org.deeplearning4j.nn.conf.layers.recurrent.LastTimeStep; import org.deeplearning4j.nn.conf.layers.recurrent.LastTimeStep;
import org.deeplearning4j.nn.conf.layers.util.MaskZeroLayer; import org.deeplearning4j.nn.conf.layers.util.MaskZeroLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.KerasTestUtils; import org.deeplearning4j.nn.modelimport.keras.KerasTestUtils;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
@ -44,7 +45,7 @@ import static org.junit.Assert.assertNotNull;
/** /**
* @author Max Pumperla * @author Max Pumperla
*/ */
public class KerasLSTMTest { public class KerasLSTMTest extends BaseDL4JTest {
private final String ACTIVATION_KERAS = "linear"; private final String ACTIVATION_KERAS = "linear";
private final String ACTIVATION_DL4J = "identity"; private final String ACTIVATION_DL4J = "identity";

View File

@ -19,6 +19,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.recurrent;
import org.deeplearning4j.nn.conf.dropout.Dropout; import org.deeplearning4j.nn.conf.dropout.Dropout;
import org.deeplearning4j.nn.conf.layers.recurrent.LastTimeStep; import org.deeplearning4j.nn.conf.layers.recurrent.LastTimeStep;
import org.deeplearning4j.nn.conf.layers.recurrent.SimpleRnn; import org.deeplearning4j.nn.conf.layers.recurrent.SimpleRnn;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.KerasTestUtils; import org.deeplearning4j.nn.modelimport.keras.KerasTestUtils;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
@ -35,7 +36,7 @@ import static org.junit.Assert.assertEquals;
/** /**
* @author Max Pumperla * @author Max Pumperla
*/ */
public class KerasSimpleRnnTest { public class KerasSimpleRnnTest extends BaseDL4JTest {
private final String ACTIVATION = "sigmoid"; private final String ACTIVATION = "sigmoid";
private final String LAYER_NAME = "simple_rnn_layer"; private final String LAYER_NAME = "simple_rnn_layer";

View File

@ -18,6 +18,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.wrappers;
import org.deeplearning4j.nn.conf.layers.LSTM; import org.deeplearning4j.nn.conf.layers.LSTM;
import org.deeplearning4j.nn.conf.layers.recurrent.Bidirectional; import org.deeplearning4j.nn.conf.layers.recurrent.Bidirectional;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -33,7 +34,7 @@ import static org.junit.Assert.assertEquals;
/** /**
* @author Max Pumperla * @author Max Pumperla
*/ */
public class KerasBidirectionalTest { public class KerasBidirectionalTest extends BaseDL4JTest {
private final String ACTIVATION_KERAS = "linear"; private final String ACTIVATION_KERAS = "linear";
private final String ACTIVATION_DL4J = "identity"; private final String ACTIVATION_DL4J = "identity";

View File

@ -17,6 +17,7 @@
package org.deeplearning4j.nn.modelimport.keras.optimizers; package org.deeplearning4j.nn.modelimport.keras.optimizers;
import org.deeplearning4j.config.DL4JSystemProperties; import org.deeplearning4j.config.DL4JSystemProperties;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.KerasModel; import org.deeplearning4j.nn.modelimport.keras.KerasModel;
import org.deeplearning4j.nn.modelimport.keras.KerasSequentialModel; import org.deeplearning4j.nn.modelimport.keras.KerasSequentialModel;
import org.deeplearning4j.nn.modelimport.keras.e2e.KerasModelEndToEndTest; import org.deeplearning4j.nn.modelimport.keras.e2e.KerasModelEndToEndTest;
@ -33,7 +34,7 @@ import java.nio.file.StandardCopyOption;
import static java.io.File.createTempFile; import static java.io.File.createTempFile;
public class OptimizerImport { public class OptimizerImport extends BaseDL4JTest {
@Test @Test
public void importAdam() throws Exception { public void importAdam() throws Exception {

View File

@ -16,6 +16,7 @@
package org.deeplearning4j.nn.modelimport.keras.preprocessing.sequence; package org.deeplearning4j.nn.modelimport.keras.preprocessing.sequence;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.preprocessing.text.KerasTokenizer; import org.deeplearning4j.nn.modelimport.keras.preprocessing.text.KerasTokenizer;
import org.junit.Test; import org.junit.Test;
@ -29,7 +30,7 @@ import java.io.IOException;
* *
* @author Max Pumperla * @author Max Pumperla
*/ */
public class TimeSeriesGeneratorImportTest { public class TimeSeriesGeneratorImportTest extends BaseDL4JTest {
@Test @Test
public void importTimeSeriesTest() throws IOException, InvalidKerasConfigurationException { public void importTimeSeriesTest() throws IOException, InvalidKerasConfigurationException {

View File

@ -16,6 +16,7 @@
package org.deeplearning4j.nn.modelimport.keras.preprocessing.sequence; package org.deeplearning4j.nn.modelimport.keras.preprocessing.sequence;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.junit.Test; import org.junit.Test;
import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ndarray.INDArray;
@ -24,7 +25,7 @@ import org.nd4j.linalg.primitives.Pair;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
public class TimeSeriesGeneratorTest { public class TimeSeriesGeneratorTest extends BaseDL4JTest {
@Test @Test
public void tsGeneratorTest() throws InvalidKerasConfigurationException { public void tsGeneratorTest() throws InvalidKerasConfigurationException {

View File

@ -16,6 +16,7 @@
package org.deeplearning4j.nn.modelimport.keras.preprocessing.text; package org.deeplearning4j.nn.modelimport.keras.preprocessing.text;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.junit.Test; import org.junit.Test;
import org.nd4j.linalg.io.ClassPathResource; import org.nd4j.linalg.io.ClassPathResource;
@ -33,7 +34,7 @@ import static org.junit.Assert.assertTrue;
* *
* @author Max Pumperla * @author Max Pumperla
*/ */
public class TokenizerImportTest { public class TokenizerImportTest extends BaseDL4JTest {
ClassLoader classLoader = getClass().getClassLoader(); ClassLoader classLoader = getClass().getClassLoader();

View File

@ -16,6 +16,7 @@
package org.deeplearning4j.nn.modelimport.keras.preprocessing.text; package org.deeplearning4j.nn.modelimport.keras.preprocessing.text;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.junit.Test; import org.junit.Test;
import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ndarray.INDArray;
@ -31,7 +32,7 @@ import static org.junit.Assert.assertEquals;
* *
* @author Max Pumperla * @author Max Pumperla
*/ */
public class TokenizerTest { public class TokenizerTest extends BaseDL4JTest {
@Test @Test
public void tokenizerBasics() { public void tokenizerBasics() {

View File

@ -19,6 +19,7 @@ package org.deeplearning4j.nn.modelimport.keras.weights;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import lombok.val; import lombok.val;
import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.graph.ComputationGraph;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.KerasLayer; import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
import org.deeplearning4j.nn.modelimport.keras.KerasModel; import org.deeplearning4j.nn.modelimport.keras.KerasModel;
import org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasSpaceToDepth; import org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasSpaceToDepth;
@ -42,7 +43,7 @@ import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
@Slf4j @Slf4j
public class KerasWeightSettingTests { public class KerasWeightSettingTests extends BaseDL4JTest {
@Rule @Rule
public final TemporaryFolder testDir = new TemporaryFolder(); public final TemporaryFolder testDir = new TemporaryFolder();

View File

@ -0,0 +1,140 @@
/*******************************************************************************
* Copyright (c) 2015-2018 Skymind, Inc.
*
* This program and the accompanying materials are made available under the
* terms of the Apache License, Version 2.0 which is available at
* https://www.apache.org/licenses/LICENSE-2.0.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
* SPDX-License-Identifier: Apache-2.0
******************************************************************************/
package org.deeplearning4j.nearestneighbor.server;
import lombok.extern.slf4j.Slf4j;
import org.bytedeco.javacpp.Pointer;
import org.junit.After;
import org.junit.Before;
import org.junit.Rule;
import org.junit.rules.TestName;
import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.api.memory.MemoryWorkspace;
import org.nd4j.linalg.api.ops.executioner.OpExecutioner;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.profiler.ProfilerConfig;
import java.lang.management.ManagementFactory;
import java.util.List;
import java.util.Map;
import java.util.Properties;
@Slf4j
public class BaseDL4JTest {
@Rule
public TestName name = new TestName();
protected long startTime;
protected int threadCountBefore;
/**
* Override this to set the profiling mode for the tests defined in the child class
*/
public OpExecutioner.ProfilingMode getProfilingMode(){
return OpExecutioner.ProfilingMode.SCOPE_PANIC;
}
/**
* Override this to set the datatype of the tests defined in the child class
*/
public DataType getDataType(){
return DataType.DOUBLE;
}
public DataType getDefaultFPDataType(){
return getDataType();
}
@Before
public void beforeTest(){
log.info("{}.{}", getClass().getSimpleName(), name.getMethodName());
Nd4j.getExecutioner().setProfilingMode(getProfilingMode());
Nd4j.getExecutioner().setProfilingConfig(ProfilerConfig.builder().build());
Nd4j.setDefaultDataTypes(getDataType(), getDefaultFPDataType());
startTime = System.currentTimeMillis();
threadCountBefore = ManagementFactory.getThreadMXBean().getThreadCount();
}
@After
public void afterTest(){
//Attempt to keep workspaces isolated between tests
Nd4j.getWorkspaceManager().destroyAllWorkspacesForCurrentThread();
MemoryWorkspace currWS = Nd4j.getMemoryManager().getCurrentWorkspace();
Nd4j.getMemoryManager().setCurrentWorkspace(null);
if(currWS != null){
//Not really safe to continue testing under this situation... other tests will likely fail with obscure
// errors that are hard to track back to this
log.error("Open workspace leaked from test! Exiting - {}, isOpen = {} - {}", currWS.getId(), currWS.isScopeActive(), currWS);
System.exit(1);
}
StringBuilder sb = new StringBuilder();
long maxPhys = Pointer.maxPhysicalBytes();
long maxBytes = Pointer.maxBytes();
long currPhys = Pointer.physicalBytes();
long currBytes = Pointer.totalBytes();
long jvmTotal = Runtime.getRuntime().totalMemory();
long jvmMax = Runtime.getRuntime().maxMemory();
int threadsAfter = ManagementFactory.getThreadMXBean().getThreadCount();
long duration = System.currentTimeMillis() - startTime;
sb.append(getClass().getSimpleName()).append(".").append(name.getMethodName())
.append(": ").append(duration).append(" ms")
.append(", threadCount: (").append(threadCountBefore).append("->").append(threadsAfter).append(")")
.append(", jvmTotal=").append(jvmTotal)
.append(", jvmMax=").append(jvmMax)
.append(", totalBytes=").append(currBytes).append(", maxBytes=").append(maxBytes)
.append(", currPhys=").append(currPhys).append(", maxPhys=").append(maxPhys);
List<MemoryWorkspace> ws = Nd4j.getWorkspaceManager().getAllWorkspacesForCurrentThread();
if(ws != null && ws.size() > 0){
long currSize = 0;
for(MemoryWorkspace w : ws){
currSize += w.getCurrentSize();
}
if(currSize > 0){
sb.append(", threadWSSize=").append(currSize)
.append(" (").append(ws.size()).append(" WSs)");
}
}
Properties p = Nd4j.getExecutioner().getEnvironmentInformation();
Object o = p.get("cuda.devicesInformation");
if(o instanceof List){
List<Map<String,Object>> l = (List<Map<String, Object>>) o;
if(l.size() > 0) {
sb.append(" [").append(l.size())
.append(" GPUs: ");
for (int i = 0; i < l.size(); i++) {
Map<String,Object> m = l.get(i);
if(i > 0)
sb.append(",");
sb.append("(").append(m.get("cuda.freeMemory")).append(" free, ")
.append(m.get("cuda.totalMemory")).append(" total)");
}
sb.append("]");
}
}
log.info(sb.toString());
}
}

View File

@ -44,7 +44,7 @@ import static org.junit.Assert.assertEquals;
/** /**
* Created by agibsonccc on 4/27/17. * Created by agibsonccc on 4/27/17.
*/ */
public class NearestNeighborTest { public class NearestNeighborTest extends BaseDL4JTest {
@Rule @Rule
public TemporaryFolder testDir = new TemporaryFolder(); public TemporaryFolder testDir = new TemporaryFolder();

View File

@ -175,7 +175,7 @@ public class KDTree implements Serializable {
return Pair.of(Double.POSITIVE_INFINITY, null); return Pair.of(Double.POSITIVE_INFINITY, null);
int _discNext = (_disc + 1) % dims; int _discNext = (_disc + 1) % dims;
double dist2 = Nd4j.getExecutioner().execAndReturn(new EuclideanDistance(point, Nd4j.zeros(point.shape()))).getFinalResult().doubleValue(); double dist2 = Nd4j.getExecutioner().execAndReturn(new EuclideanDistance(point, Nd4j.zeros(point.dataType(), point.shape()))).getFinalResult().doubleValue();
if (dist2 < dist) { if (dist2 < dist) {
best = node.getPoint(); best = node.getPoint();
dist = dist2; dist = dist2;

View File

@ -0,0 +1,140 @@
/*******************************************************************************
* Copyright (c) 2015-2018 Skymind, Inc.
*
* This program and the accompanying materials are made available under the
* terms of the Apache License, Version 2.0 which is available at
* https://www.apache.org/licenses/LICENSE-2.0.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
* SPDX-License-Identifier: Apache-2.0
******************************************************************************/
package org.deeplearning4j.clustering;
import lombok.extern.slf4j.Slf4j;
import org.bytedeco.javacpp.Pointer;
import org.junit.After;
import org.junit.Before;
import org.junit.Rule;
import org.junit.rules.TestName;
import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.api.memory.MemoryWorkspace;
import org.nd4j.linalg.api.ops.executioner.OpExecutioner;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.profiler.ProfilerConfig;
import java.lang.management.ManagementFactory;
import java.util.List;
import java.util.Map;
import java.util.Properties;
@Slf4j
public class BaseDL4JTest {
@Rule
public TestName name = new TestName();
protected long startTime;
protected int threadCountBefore;
/**
* Override this to set the profiling mode for the tests defined in the child class
*/
public OpExecutioner.ProfilingMode getProfilingMode(){
return OpExecutioner.ProfilingMode.SCOPE_PANIC;
}
/**
* Override this to set the datatype of the tests defined in the child class
*/
public DataType getDataType(){
return DataType.DOUBLE;
}
public DataType getDefaultFPDataType(){
return getDataType();
}
@Before
public void beforeTest(){
log.info("{}.{}", getClass().getSimpleName(), name.getMethodName());
Nd4j.getExecutioner().setProfilingMode(getProfilingMode());
Nd4j.getExecutioner().setProfilingConfig(ProfilerConfig.builder().build());
Nd4j.setDefaultDataTypes(getDataType(), getDefaultFPDataType());
startTime = System.currentTimeMillis();
threadCountBefore = ManagementFactory.getThreadMXBean().getThreadCount();
}
@After
public void afterTest(){
//Attempt to keep workspaces isolated between tests
Nd4j.getWorkspaceManager().destroyAllWorkspacesForCurrentThread();
MemoryWorkspace currWS = Nd4j.getMemoryManager().getCurrentWorkspace();
Nd4j.getMemoryManager().setCurrentWorkspace(null);
if(currWS != null){
//Not really safe to continue testing under this situation... other tests will likely fail with obscure
// errors that are hard to track back to this
log.error("Open workspace leaked from test! Exiting - {}, isOpen = {} - {}", currWS.getId(), currWS.isScopeActive(), currWS);
System.exit(1);
}
StringBuilder sb = new StringBuilder();
long maxPhys = Pointer.maxPhysicalBytes();
long maxBytes = Pointer.maxBytes();
long currPhys = Pointer.physicalBytes();
long currBytes = Pointer.totalBytes();
long jvmTotal = Runtime.getRuntime().totalMemory();
long jvmMax = Runtime.getRuntime().maxMemory();
int threadsAfter = ManagementFactory.getThreadMXBean().getThreadCount();
long duration = System.currentTimeMillis() - startTime;
sb.append(getClass().getSimpleName()).append(".").append(name.getMethodName())
.append(": ").append(duration).append(" ms")
.append(", threadCount: (").append(threadCountBefore).append("->").append(threadsAfter).append(")")
.append(", jvmTotal=").append(jvmTotal)
.append(", jvmMax=").append(jvmMax)
.append(", totalBytes=").append(currBytes).append(", maxBytes=").append(maxBytes)
.append(", currPhys=").append(currPhys).append(", maxPhys=").append(maxPhys);
List<MemoryWorkspace> ws = Nd4j.getWorkspaceManager().getAllWorkspacesForCurrentThread();
if(ws != null && ws.size() > 0){
long currSize = 0;
for(MemoryWorkspace w : ws){
currSize += w.getCurrentSize();
}
if(currSize > 0){
sb.append(", threadWSSize=").append(currSize)
.append(" (").append(ws.size()).append(" WSs)");
}
}
Properties p = Nd4j.getExecutioner().getEnvironmentInformation();
Object o = p.get("cuda.devicesInformation");
if(o instanceof List){
List<Map<String,Object>> l = (List<Map<String, Object>>) o;
if(l.size() > 0) {
sb.append(" [").append(l.size())
.append(" GPUs: ");
for (int i = 0; i < l.size(); i++) {
Map<String,Object> m = l.get(i);
if(i > 0)
sb.append(",");
sb.append("(").append(m.get("cuda.freeMemory")).append(" free, ")
.append(m.get("cuda.totalMemory")).append(" total)");
}
sb.append("]");
}
}
log.info(sb.toString());
}
}

View File

@ -18,6 +18,7 @@ package org.deeplearning4j.clustering.kdtree;
import com.google.common.primitives.Doubles; import com.google.common.primitives.Doubles;
import lombok.val; import lombok.val;
import org.deeplearning4j.clustering.BaseDL4JTest;
import org.joda.time.Duration; import org.joda.time.Duration;
import org.junit.Before; import org.junit.Before;
import org.junit.BeforeClass; import org.junit.BeforeClass;
@ -40,7 +41,7 @@ import static org.junit.Assert.assertTrue;
/** /**
* Created by agibsonccc on 1/1/15. * Created by agibsonccc on 1/1/15.
*/ */
public class KDTreeTest { public class KDTreeTest extends BaseDL4JTest {
private KDTree kdTree; private KDTree kdTree;

View File

@ -17,6 +17,7 @@
package org.deeplearning4j.clustering.kmeans; package org.deeplearning4j.clustering.kmeans;
import org.apache.commons.lang3.time.StopWatch; import org.apache.commons.lang3.time.StopWatch;
import org.deeplearning4j.clustering.BaseDL4JTest;
import org.deeplearning4j.clustering.algorithm.Distance; import org.deeplearning4j.clustering.algorithm.Distance;
import org.deeplearning4j.clustering.cluster.*; import org.deeplearning4j.clustering.cluster.*;
import org.junit.Ignore; import org.junit.Ignore;
@ -33,7 +34,7 @@ import static org.junit.Assert.fail;
/** /**
* Created by agibsonccc on 7/2/17. * Created by agibsonccc on 7/2/17.
*/ */
public class KMeansTest { public class KMeansTest extends BaseDL4JTest {
@Test @Test
public void testKMeans() { public void testKMeans() {

View File

@ -16,6 +16,7 @@
package org.deeplearning4j.clustering.lsh; package org.deeplearning4j.clustering.lsh;
import org.deeplearning4j.clustering.BaseDL4JTest;
import org.junit.After; import org.junit.After;
import org.junit.Before; import org.junit.Before;
import org.junit.Ignore; import org.junit.Ignore;
@ -31,7 +32,7 @@ import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertTrue;
public class RandomProjectionLSHTest { public class RandomProjectionLSHTest extends BaseDL4JTest {
int hashLength = 31; int hashLength = 31;
int numTables = 2; int numTables = 2;

View File

@ -16,6 +16,7 @@
package org.deeplearning4j.clustering.quadtree; package org.deeplearning4j.clustering.quadtree;
import org.deeplearning4j.clustering.BaseDL4JTest;
import org.junit.Test; import org.junit.Test;
import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.factory.Nd4j;
@ -26,7 +27,7 @@ import static org.junit.Assert.assertTrue;
/** /**
* Created by agibsonccc on 1/2/15. * Created by agibsonccc on 1/2/15.
*/ */
public class QuadTreeTest { public class QuadTreeTest extends BaseDL4JTest {
@Test @Test
public void testQuadTree() { public void testQuadTree() {

View File

@ -16,6 +16,7 @@
package org.deeplearning4j.clustering.randomprojection; package org.deeplearning4j.clustering.randomprojection;
import org.deeplearning4j.clustering.BaseDL4JTest;
import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator;
import org.junit.Before; import org.junit.Before;
import org.junit.Test; import org.junit.Test;
@ -31,7 +32,7 @@ import java.util.List;
import static org.junit.Assert.*; import static org.junit.Assert.*;
public class RPTreeTest { public class RPTreeTest extends BaseDL4JTest {
@Before @Before
public void setUp() { public void setUp() {

View File

@ -16,13 +16,14 @@
package org.deeplearning4j.clustering.randomprojection; package org.deeplearning4j.clustering.randomprojection;
import org.deeplearning4j.clustering.BaseDL4JTest;
import org.junit.Test; import org.junit.Test;
import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.factory.Nd4j;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
public class RPUtilsTest { public class RPUtilsTest extends BaseDL4JTest {
@Test @Test
public void testDistanceComputeBatch() { public void testDistanceComputeBatch() {

View File

@ -18,6 +18,7 @@ package org.deeplearning4j.clustering.sptree;
import com.google.common.util.concurrent.AtomicDouble; import com.google.common.util.concurrent.AtomicDouble;
import org.apache.commons.lang3.time.StopWatch; import org.apache.commons.lang3.time.StopWatch;
import org.deeplearning4j.clustering.BaseDL4JTest;
import org.junit.Before; import org.junit.Before;
import org.junit.Ignore; import org.junit.Ignore;
import org.junit.Test; import org.junit.Test;
@ -33,7 +34,7 @@ import static org.junit.Assert.*;
/** /**
* @author Adam Gibson * @author Adam Gibson
*/ */
public class SPTreeTest { public class SPTreeTest extends BaseDL4JTest {
@Before @Before
public void setUp() { public void setUp() {

Some files were not shown because too many files have changed in this diff Show More