First round of runtime test improvements (#7875)

* Capsnet test runtime improvements

* Slow test speedups

* Next round of test speed improvements

* More test improvements

* Improve test speed

* Next round of test speedups

* Another round

* More test speedups

* Another round

* Another round of test speedups

* Another round of speedups...

* CuDNN test speedups + more tests extending BaseDL4JTest

* Minor fix + more BaseDL4JTest use in other modules
master
Alex Black 2019-06-13 20:40:40 +10:00 committed by GitHub
parent b5f0ec072f
commit 32e5cc1945
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
188 changed files with 2558 additions and 1531 deletions

View File

@ -17,12 +17,14 @@
package org.deeplearning4j.gradientcheck;
import org.deeplearning4j.BaseDL4JTest;
import org.deeplearning4j.TestUtils;
import org.deeplearning4j.nn.conf.ComputationGraphConfiguration;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.graph.AttentionVertex;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.*;
import org.deeplearning4j.nn.conf.layers.recurrent.SimpleRnn;
import org.deeplearning4j.nn.graph.ComputationGraph;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
import org.deeplearning4j.nn.weights.WeightInit;
@ -44,7 +46,7 @@ public class AttentionLayerTest extends BaseDL4JTest {
@Rule
public ExpectedException exceptionRule = ExpectedException.none();
private static final boolean PRINT_RESULTS = false;
private static final boolean PRINT_RESULTS = true;
private static final boolean RETURN_ON_FIRST_FAILURE = false;
private static final double DEFAULT_EPS = 1e-6;
private static final double DEFAULT_MAX_REL_ERROR = 1e-3;
@ -53,19 +55,15 @@ public class AttentionLayerTest extends BaseDL4JTest {
@Test
public void testSelfAttentionLayer() {
int nIn = 3;
int nOut = 5;
int nOut = 2;
int tsLength = 4;
int layerSize = 8;
int layerSize = 4;
Random r = new Random(12345);
for (int mb : new int[]{1, 2, 3}) {
for (int mb : new int[]{1, 3}) {
for (boolean inputMask : new boolean[]{false, true}) {
for (boolean projectInput : new boolean[]{false, true}) {
INDArray in = Nd4j.rand(new int[]{mb, nIn, tsLength});
INDArray labels = Nd4j.create(mb, nOut);
for (int i = 0; i < mb; i++) {
labels.putScalar(i, r.nextInt(nOut), 1.0);
}
INDArray in = Nd4j.rand(DataType.DOUBLE, new int[]{mb, nIn, tsLength});
INDArray labels = TestUtils.randomOneHot(mb, nOut);
String maskType = (inputMask ? "inputMask" : "none");
INDArray inMask = null;
@ -94,7 +92,7 @@ public class AttentionLayerTest extends BaseDL4JTest {
.list()
.layer(new LSTM.Builder().nOut(layerSize).build())
.layer( projectInput ?
new SelfAttentionLayer.Builder().nOut(8).nHeads(2).projectInput(true).build()
new SelfAttentionLayer.Builder().nOut(4).nHeads(2).projectInput(true).build()
: new SelfAttentionLayer.Builder().nHeads(1).projectInput(false).build()
)
.layer(new GlobalPoolingLayer.Builder().poolingType(PoolingType.MAX).build())
@ -107,7 +105,7 @@ public class AttentionLayerTest extends BaseDL4JTest {
net.init();
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, in, labels, inMask, null);
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, in, labels, inMask, null, true, 100);
assertTrue(name, gradOK);
}
}
@ -117,20 +115,16 @@ public class AttentionLayerTest extends BaseDL4JTest {
@Test
public void testLearnedSelfAttentionLayer() {
int nIn = 3;
int nOut = 5;
int nOut = 2;
int tsLength = 4;
int layerSize = 8;
int numQueries = 6;
int layerSize = 4;
int numQueries = 3;
Random r = new Random(12345);
for (boolean inputMask : new boolean[]{false, true}) {
for (int mb : new int[]{3, 2, 1}) {
for (int mb : new int[]{3, 1}) {
for (boolean projectInput : new boolean[]{false, true}) {
INDArray in = Nd4j.rand(new int[]{mb, nIn, tsLength});
INDArray labels = Nd4j.create(mb, nOut);
for (int i = 0; i < mb; i++) {
labels.putScalar(i, r.nextInt(nOut), 1.0);
}
INDArray in = Nd4j.rand(DataType.DOUBLE, new int[]{mb, nIn, tsLength});
INDArray labels = TestUtils.randomOneHot(mb, nOut);
String maskType = (inputMask ? "inputMask" : "none");
INDArray inMask = null;
@ -159,7 +153,7 @@ public class AttentionLayerTest extends BaseDL4JTest {
.list()
.layer(new LSTM.Builder().nOut(layerSize).build())
.layer( projectInput ?
new LearnedSelfAttentionLayer.Builder().nOut(8).nHeads(2).nQueries(numQueries).projectInput(true).build()
new LearnedSelfAttentionLayer.Builder().nOut(4).nHeads(2).nQueries(numQueries).projectInput(true).build()
: new LearnedSelfAttentionLayer.Builder().nHeads(1).nQueries(numQueries).projectInput(false).build()
)
.layer(new GlobalPoolingLayer.Builder().poolingType(PoolingType.MAX).build())
@ -172,7 +166,7 @@ public class AttentionLayerTest extends BaseDL4JTest {
net.init();
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, in, labels, inMask, null);
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, in, labels, inMask, null, true, 100);
assertTrue(name, gradOK);
}
}
@ -182,10 +176,10 @@ public class AttentionLayerTest extends BaseDL4JTest {
@Test
public void testLearnedSelfAttentionLayer_differentMiniBatchSizes() {
int nIn = 3;
int nOut = 5;
int nOut = 2;
int tsLength = 4;
int layerSize = 8;
int numQueries = 6;
int layerSize = 4;
int numQueries = 3;
Random r = new Random(12345);
for (boolean inputMask : new boolean[]{false, true}) {
@ -199,7 +193,7 @@ public class AttentionLayerTest extends BaseDL4JTest {
.list()
.layer(new LSTM.Builder().nOut(layerSize).build())
.layer( projectInput ?
new LearnedSelfAttentionLayer.Builder().nOut(8).nHeads(2).nQueries(numQueries).projectInput(true).build()
new LearnedSelfAttentionLayer.Builder().nOut(4).nHeads(2).nQueries(numQueries).projectInput(true).build()
: new LearnedSelfAttentionLayer.Builder().nHeads(1).nQueries(numQueries).projectInput(false).build()
)
.layer(new GlobalPoolingLayer.Builder().poolingType(PoolingType.MAX).build())
@ -210,17 +204,14 @@ public class AttentionLayerTest extends BaseDL4JTest {
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
for (int mb : new int[]{3, 2, 1}) {
INDArray in = Nd4j.rand(new int[]{mb, nIn, tsLength});
INDArray labels = Nd4j.create(mb, nOut);
for (int i = 0; i < mb; i++) {
labels.putScalar(i, r.nextInt(nOut), 1.0);
}
for (int mb : new int[]{3, 1}) {
INDArray in = Nd4j.rand(DataType.DOUBLE, new int[]{mb, nIn, tsLength});
INDArray labels = TestUtils.randomOneHot(mb, nOut);
String maskType = (inputMask ? "inputMask" : "none");
INDArray inMask = null;
if (inputMask) {
inMask = Nd4j.ones(mb, tsLength);
inMask = Nd4j.ones(DataType.INT, mb, tsLength);
for (int i = 0; i < mb; i++) {
int firstMaskedStep = tsLength - 1 - i;
if (firstMaskedStep == 0) {
@ -236,7 +227,7 @@ public class AttentionLayerTest extends BaseDL4JTest {
System.out.println("Starting test: " + name);
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, in, labels, inMask, null);
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, in, labels, inMask, null, true, 100);
assertTrue(name, gradOK);
}
}
@ -282,20 +273,15 @@ public class AttentionLayerTest extends BaseDL4JTest {
@Test
public void testRecurrentAttentionLayer() {
int nIn = 9;
int nOut = 5;
int tsLength = 4;
int layerSize = 8;
int nIn = 4;
int nOut = 2;
int tsLength = 3;
int layerSize = 3;
Random r = new Random(12345);
for (int mb : new int[]{3, 2, 1}) {
for (int mb : new int[]{3, 1}) {
for (boolean inputMask : new boolean[]{true, false}) {
INDArray in = Nd4j.rand(new int[]{mb, nIn, tsLength});
INDArray labels = Nd4j.create(mb, nOut);
for (int i = 0; i < mb; i++) {
labels.putScalar(i, r.nextInt(nOut), 1.0);
}
INDArray in = Nd4j.rand(DataType.DOUBLE, new int[]{mb, nIn, tsLength});
INDArray labels = TestUtils.randomOneHot(mb, nOut);
String maskType = (inputMask ? "inputMask" : "none");
INDArray inMask = null;
@ -335,8 +321,7 @@ public class AttentionLayerTest extends BaseDL4JTest {
//System.out.println("Original");
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, in, labels, inMask, null, false, -1, null
);
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, in, labels, inMask, null, true, 100, null);
assertTrue(name, gradOK);
}
}
@ -345,19 +330,16 @@ public class AttentionLayerTest extends BaseDL4JTest {
@Test
public void testAttentionVertex() {
int nIn = 3;
int nOut = 5;
int tsLength = 4;
int layerSize = 8;
int nOut = 2;
int tsLength = 3;
int layerSize = 3;
Random r = new Random(12345);
for (boolean inputMask : new boolean[]{false, true}) {
for (int mb : new int[]{3, 2, 1}) {
for (int mb : new int[]{3, 1}) {
for (boolean projectInput : new boolean[]{false, true}) {
INDArray in = Nd4j.rand(new int[]{mb, nIn, tsLength});
INDArray labels = Nd4j.create(mb, nOut);
for (int i = 0; i < mb; i++) {
labels.putScalar(i, r.nextInt(nOut), 1.0);
}
INDArray in = Nd4j.rand(DataType.DOUBLE, new int[]{mb, nIn, tsLength});
INDArray labels = TestUtils.randomOneHot(mb, nOut);
String maskType = (inputMask ? "inputMask" : "none");
INDArray inMask = null;
@ -385,13 +367,13 @@ public class AttentionLayerTest extends BaseDL4JTest {
.weightInit(WeightInit.XAVIER)
.graphBuilder()
.addInputs("input")
.addLayer("lstmKeys", new LSTM.Builder().nOut(layerSize).build(), "input")
.addLayer("lstmQueries", new LSTM.Builder().nOut(layerSize).build(), "input")
.addLayer("lstmValues", new LSTM.Builder().nOut(layerSize).build(), "input")
.addLayer("rnnKeys", new SimpleRnn.Builder().nOut(layerSize).build(), "input")
.addLayer("rnnQueries", new SimpleRnn.Builder().nOut(layerSize).build(), "input")
.addLayer("rnnValues", new SimpleRnn.Builder().nOut(layerSize).build(), "input")
.addVertex("attention",
projectInput ?
new AttentionVertex.Builder().nOut(8).nHeads(2).projectInput(true).nInQueries(layerSize).nInKeys(layerSize).nInValues(layerSize).build()
: new AttentionVertex.Builder().nOut(8).nHeads(1).projectInput(false).nInQueries(layerSize).nInKeys(layerSize).nInValues(layerSize).build(), "lstmQueries", "lstmKeys", "lstmValues")
new AttentionVertex.Builder().nOut(4).nHeads(2).projectInput(true).nInQueries(layerSize).nInKeys(layerSize).nInValues(layerSize).build()
: new AttentionVertex.Builder().nOut(3).nHeads(1).projectInput(false).nInQueries(layerSize).nInKeys(layerSize).nInValues(layerSize).build(), "rnnQueries", "rnnKeys", "rnnValues")
.addLayer("pooling", new GlobalPoolingLayer.Builder().poolingType(PoolingType.MAX).build(), "attention")
.addLayer("output", new OutputLayer.Builder().nOut(nOut).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(), "pooling")
.setOutputs("output")
@ -412,19 +394,16 @@ public class AttentionLayerTest extends BaseDL4JTest {
@Test
public void testAttentionVertexSameInput() {
int nIn = 3;
int nOut = 5;
int nOut = 2;
int tsLength = 4;
int layerSize = 8;
int layerSize = 4;
Random r = new Random(12345);
for (boolean inputMask : new boolean[]{false, true}) {
for (int mb : new int[]{3, 2, 1}) {
for (int mb : new int[]{3, 1}) {
for (boolean projectInput : new boolean[]{false, true}) {
INDArray in = Nd4j.rand(new int[]{mb, nIn, tsLength});
INDArray labels = Nd4j.create(mb, nOut);
for (int i = 0; i < mb; i++) {
labels.putScalar(i, r.nextInt(nOut), 1.0);
}
INDArray labels = TestUtils.randomOneHot(mb, nOut);
String maskType = (inputMask ? "inputMask" : "none");
INDArray inMask = null;
@ -452,11 +431,11 @@ public class AttentionLayerTest extends BaseDL4JTest {
.weightInit(WeightInit.XAVIER)
.graphBuilder()
.addInputs("input")
.addLayer("lstm", new LSTM.Builder().nOut(layerSize).build(), "input")
.addLayer("rnn", new SimpleRnn.Builder().activation(Activation.TANH).nOut(layerSize).build(), "input")
.addVertex("attention",
projectInput ?
new AttentionVertex.Builder().nOut(8).nHeads(2).projectInput(true).nInQueries(layerSize).nInKeys(layerSize).nInValues(layerSize).build()
: new AttentionVertex.Builder().nOut(8).nHeads(1).projectInput(false).nInQueries(layerSize).nInKeys(layerSize).nInValues(layerSize).build(), "lstm", "lstm", "lstm")
new AttentionVertex.Builder().nOut(4).nHeads(2).projectInput(true).nInQueries(layerSize).nInKeys(layerSize).nInValues(layerSize).build()
: new AttentionVertex.Builder().nOut(4).nHeads(1).projectInput(false).nInQueries(layerSize).nInKeys(layerSize).nInValues(layerSize).build(), "rnn", "rnn", "rnn")
.addLayer("pooling", new GlobalPoolingLayer.Builder().poolingType(PoolingType.MAX).build(), "attention")
.addLayer("output", new OutputLayer.Builder().nOut(nOut).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(), "pooling")
.setOutputs("output")
@ -467,7 +446,8 @@ public class AttentionLayerTest extends BaseDL4JTest {
net.init();
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, new INDArray[]{in}, new INDArray[]{labels}, inMask != null ? new INDArray[]{inMask} : null, null);
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, new INDArray[]{in},
new INDArray[]{labels}, inMask != null ? new INDArray[]{inMask} : null, null);
assertTrue(name, gradOK);
}
}

View File

@ -76,7 +76,7 @@ public class BNGradientCheckTest extends BaseDL4JTest {
INDArray input = ds.getFeatures();
INDArray labels = ds.getLabels();
for(boolean useLogStd : new boolean[]{true, false}) {
for (boolean useLogStd : new boolean[]{true, false}) {
MultiLayerConfiguration.Builder builder =
new NeuralNetConfiguration.Builder().updater(new NoOp())
@ -117,14 +117,14 @@ public class BNGradientCheckTest extends BaseDL4JTest {
int depth = 1;
int hw = 4;
int nOut = 4;
INDArray input = Nd4j.rand(new int[] {minibatch, depth, hw, hw});
INDArray input = Nd4j.rand(new int[]{minibatch, depth, hw, hw});
INDArray labels = Nd4j.zeros(minibatch, nOut);
Random r = new Random(12345);
for (int i = 0; i < minibatch; i++) {
labels.putScalar(i, r.nextInt(nOut), 1.0);
}
for(boolean useLogStd : new boolean[]{true, false}) {
for (boolean useLogStd : new boolean[]{true, false}) {
MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder()
.dataType(DataType.DOUBLE)
.updater(new NoOp()).seed(12345L)
@ -158,20 +158,14 @@ public class BNGradientCheckTest extends BaseDL4JTest {
}
@Test
public void testGradientBNWithCNNandSubsamplingcCnfigurableProfiler() {
Nd4j.getExecutioner().setProfilingConfig(ProfilerConfig.builder()
.notOptimalArguments(true)
.notOptimalTAD(true)
.checkForINF(true)
.checkForNAN(true)
.checkElapsedTime(true)
.stackTrace(true)
.checkWorkspaces(true)
.build());
public void testGradientBNWithCNNandSubsampling() {
//Parameterized test, testing combinations of:
// (a) activation function
// (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
// (c) Loss function (with specified output activations)
// (d) l1 and l2 values
Activation[] activFns = {Activation.SIGMOID, Activation.TANH, Activation.IDENTITY};
boolean[] characteristic = {false, true}; //If true: run some backprop steps first
boolean[] characteristic = {true}; //If true: run some backprop steps first
LossFunctions.LossFunction[] lossFunctions =
{LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE};
@ -181,24 +175,24 @@ public class BNGradientCheckTest extends BaseDL4JTest {
double[] l1vals = {0.0, 0.0, 0.2}; //i.e., use l2vals[j] with l1vals[j]
Nd4j.getRandom().setSeed(12345);
int minibatch = 10;
int minibatch = 4;
int depth = 2;
int hw = 5;
int nOut = 3;
INDArray input = Nd4j.rand(new int[] {minibatch, depth, hw, hw}).muli(5).subi(2.5);
INDArray labels = Nd4j.zeros(minibatch, nOut);
Random r = new Random(12345);
for (int i = 0; i < minibatch; i++) {
labels.putScalar(i, r.nextInt(nOut), 1.0);
}
int nOut = 2;
INDArray input = Nd4j.rand(new int[]{minibatch, depth, hw, hw}).muli(5).subi(2.5);
INDArray labels = TestUtils.randomOneHot(minibatch, nOut);
DataSet ds = new DataSet(input, labels);
for(boolean useLogStd : new boolean[]{true, false}) {
Random rng = new Random(12345);
for (boolean useLogStd : new boolean[]{true, false}) {
for (Activation afn : activFns) {
for (boolean doLearningFirst : characteristic) {
for (int i = 0; i < lossFunctions.length; i++) {
for (int j = 0; j < l2vals.length; j++) {
//Skip 2 of every 3 tests: from 24 cases to 8, still with decent coverage
if (rng.nextInt(3) != 0)
continue;
LossFunctions.LossFunction lf = lossFunctions[i];
Activation outputActivation = outputActivations[i];
@ -260,7 +254,7 @@ public class BNGradientCheckTest extends BaseDL4JTest {
//However, numerical gradient will be 0 as forward pass doesn't depend on this "parameter"
Set<String> excludeParams = new HashSet<>(Arrays.asList("1_mean", "1_var", "3_mean", "3_var", "1_log10stdev", "3_log10stdev"));
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels, excludeParams);
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels, null, null, true, 25, excludeParams); //Most params are in output layer, only these should be skipped with this threshold
assertTrue(gradOK);
TestUtils.testModelSerialization(mln);
@ -269,117 +263,6 @@ public class BNGradientCheckTest extends BaseDL4JTest {
}
}
}
OpProfiler.getInstance().printOutDashboard();
}
@Test
public void testGradientBNWithCNNandSubsampling() {
Nd4j.getExecutioner().setProfilingMode(OpExecutioner.ProfilingMode.NAN_PANIC);
//Parameterized test, testing combinations of:
// (a) activation function
// (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
// (c) Loss function (with specified output activations)
// (d) l1 and l2 values
Activation[] activFns = {Activation.SIGMOID, Activation.TANH, Activation.IDENTITY};
boolean[] characteristic = {false, true}; //If true: run some backprop steps first
LossFunctions.LossFunction[] lossFunctions =
{LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE};
Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here
double[] l2vals = {0.0, 0.1, 0.1};
double[] l1vals = {0.0, 0.0, 0.2}; //i.e., use l2vals[j] with l1vals[j]
Nd4j.getRandom().setSeed(12345);
int minibatch = 10;
int depth = 2;
int hw = 5;
int nOut = 3;
INDArray input = Nd4j.rand(new int[] {minibatch, depth, hw, hw}).muli(5).subi(2.5);
INDArray labels = Nd4j.zeros(minibatch, nOut);
Random r = new Random(12345);
for (int i = 0; i < minibatch; i++) {
labels.putScalar(i, r.nextInt(nOut), 1.0);
}
DataSet ds = new DataSet(input, labels);
for(boolean useLogStd : new boolean[]{true, false}) {
for (Activation afn : activFns) {
for (boolean doLearningFirst : characteristic) {
for (int i = 0; i < lossFunctions.length; i++) {
for (int j = 0; j < l2vals.length; j++) {
LossFunctions.LossFunction lf = lossFunctions[i];
Activation outputActivation = outputActivations[i];
MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(12345)
.dataType(DataType.DOUBLE)
.l2(l2vals[j])
.optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT)
.updater(new NoOp())
.dist(new UniformDistribution(-2, 2)).seed(12345L).list()
.layer(0, new ConvolutionLayer.Builder(2, 2).stride(1, 1).nOut(3)
.activation(afn).build())
.layer(1, new BatchNormalization.Builder().useLogStd(useLogStd).build())
.layer(2, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX)
.kernelSize(2, 2).stride(1, 1).build())
.layer(3, new BatchNormalization())
.layer(4, new ActivationLayer.Builder().activation(afn).build())
.layer(5, new OutputLayer.Builder(lf).activation(outputActivation).nOut(nOut)
.build())
.setInputType(InputType.convolutional(hw, hw, depth));
MultiLayerConfiguration conf = builder.build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
String name = new Object() {
}.getClass().getEnclosingMethod().getName();
System.out.println("Num params: " + mln.numParams());
if (doLearningFirst) {
//Run a number of iterations of learning
mln.setInput(ds.getFeatures());
mln.setLabels(ds.getLabels());
mln.computeGradientAndScore();
double scoreBefore = mln.score();
for (int k = 0; k < 20; k++)
mln.fit(ds);
mln.computeGradientAndScore();
double scoreAfter = mln.score();
//Can't test in 'characteristic mode of operation' if not learning
String msg = name
+ " - score did not (sufficiently) decrease during learning - activationFn="
+ afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation
+ ", doLearningFirst= " + doLearningFirst + " (before=" + scoreBefore
+ ", scoreAfter=" + scoreAfter + ")";
assertTrue(msg, scoreAfter < 0.9 * scoreBefore);
}
if (PRINT_RESULTS) {
System.out.println(name + " - activationFn=" + afn + ", lossFn=" + lf
+ ", outputActivation=" + outputActivation + ", doLearningFirst="
+ doLearningFirst + ", l1=" + l1vals[j] + ", l2=" + l2vals[j]);
for (int k = 0; k < mln.getnLayers(); k++)
System.out.println("Layer " + k + " # params: " + mln.getLayer(k).numParams());
}
//Mean and variance vars are not gradient checkable; mean/variance "gradient" is used to implement running mean/variance calc
//i.e., runningMean = decay * runningMean + (1-decay) * batchMean
//However, numerical gradient will be 0 as forward pass doesn't depend on this "parameter"
Set<String> excludeParams = new HashSet<>(Arrays.asList("1_mean", "1_var", "3_mean", "3_var", "1_log10stdev", "3_log10stdev"));
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels, excludeParams);
assertTrue(gradOK);
TestUtils.testModelSerialization(mln);
}
}
}
}
}
OpProfiler.getInstance().printOutDashboard();
}
@ -390,21 +273,21 @@ public class BNGradientCheckTest extends BaseDL4JTest {
// (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
// (c) Loss function (with specified output activations)
// (d) l1 and l2 values
Activation[] activFns = {Activation.SIGMOID, Activation.TANH, Activation.IDENTITY};
boolean[] characteristic = {false, true}; //If true: run some backprop steps first
Activation[] activFns = {Activation.TANH, Activation.IDENTITY};
boolean[] characteristic = {true}; //If true: run some backprop steps first
LossFunctions.LossFunction[] lossFunctions =
{LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE};
{LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE};
Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here
double[] l2vals = {0.0, 0.1, 0.1};
double[] l1vals = {0.0, 0.0, 0.2}; //i.e., use l2vals[j] with l1vals[j]
double[] l2vals = {0.0, 0.1};
double[] l1vals = {0.0, 0.2}; //i.e., use l2vals[j] with l1vals[j]
Nd4j.getRandom().setSeed(12345);
int minibatch = 10;
int nIn = 5;
int nOut = 3;
INDArray input = Nd4j.rand(new int[] {minibatch, nIn});
INDArray input = Nd4j.rand(new int[]{minibatch, nIn});
INDArray labels = Nd4j.zeros(minibatch, nOut);
Random r = new Random(12345);
for (int i = 0; i < minibatch; i++) {
@ -413,7 +296,7 @@ public class BNGradientCheckTest extends BaseDL4JTest {
DataSet ds = new DataSet(input, labels);
for(boolean useLogStd : new boolean[]{true, false}) {
for (boolean useLogStd : new boolean[]{true, false}) {
for (Activation afn : activFns) {
for (boolean doLearningFirst : characteristic) {
for (int i = 0; i < lossFunctions.length; i++) {
@ -498,7 +381,7 @@ public class BNGradientCheckTest extends BaseDL4JTest {
INDArray input = ds.getFeatures();
INDArray labels = ds.getLabels();
for(boolean useLogStd : new boolean[]{true, false}) {
for (boolean useLogStd : new boolean[]{true, false}) {
MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().updater(new NoOp())
.dataType(DataType.DOUBLE)
.seed(12345L)
@ -537,14 +420,14 @@ public class BNGradientCheckTest extends BaseDL4JTest {
int depth = 1;
int hw = 4;
int nOut = 4;
INDArray input = Nd4j.rand(new int[] {minibatch, depth, hw, hw});
INDArray input = Nd4j.rand(new int[]{minibatch, depth, hw, hw});
INDArray labels = Nd4j.zeros(minibatch, nOut);
Random r = new Random(12345);
for (int i = 0; i < minibatch; i++) {
labels.putScalar(i, r.nextInt(nOut), 1.0);
}
for(boolean useLogStd : new boolean[]{true, false}) {
for (boolean useLogStd : new boolean[]{true, false}) {
MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().updater(new NoOp())
.dataType(DataType.DOUBLE)
.seed(12345L)
@ -588,7 +471,7 @@ public class BNGradientCheckTest extends BaseDL4JTest {
int minibatchSize = 3;
for(boolean useLogStd : new boolean[]{true, false}) {
for (boolean useLogStd : new boolean[]{true, false}) {
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(seed).updater(new NoOp())
.dataType(DataType.DOUBLE)
@ -630,22 +513,21 @@ public class BNGradientCheckTest extends BaseDL4JTest {
// (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
// (c) Loss function (with specified output activations)
// (d) l1 and l2 values
Activation[] activFns = {Activation.SIGMOID, Activation.TANH, Activation.IDENTITY};
boolean[] characteristic = {false, true}; //If true: run some backprop steps first
Activation[] activFns = {Activation.TANH, Activation.IDENTITY};
boolean doLearningFirst = true;
LossFunctions.LossFunction[] lossFunctions =
{LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE};
Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here
LossFunctions.LossFunction[] lossFunctions = {LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD};
Activation[] outputActivations = {Activation.SOFTMAX}; //i.e., lossFunctions[i] used with outputActivations[i] here
double[] l2vals = {0.0, 0.1, 0.1};
double[] l1vals = {0.0, 0.0, 0.2}; //i.e., use l2vals[j] with l1vals[j]
double[] l2vals = {0.0, 0.1};
double[] l1vals = {0.0, 0.2}; //i.e., use l2vals[j] with l1vals[j]
Nd4j.getRandom().setSeed(12345);
int minibatch = 10;
int depth = 2;
int hw = 5;
int nOut = 3;
INDArray input = Nd4j.rand(new int[] {minibatch, depth, hw, hw});
INDArray input = Nd4j.rand(new int[]{minibatch, depth, hw, hw});
INDArray labels = Nd4j.zeros(minibatch, nOut);
Random r = new Random(12345);
for (int i = 0; i < minibatch; i++) {
@ -654,75 +536,73 @@ public class BNGradientCheckTest extends BaseDL4JTest {
DataSet ds = new DataSet(input, labels);
for(boolean useLogStd : new boolean[]{true, false}) {
for (boolean useLogStd : new boolean[]{true, false}) {
for (Activation afn : activFns) {
for (boolean doLearningFirst : characteristic) {
for (int i = 0; i < lossFunctions.length; i++) {
for (int j = 0; j < l2vals.length; j++) {
LossFunctions.LossFunction lf = lossFunctions[i];
Activation outputActivation = outputActivations[i];
for (int i = 0; i < lossFunctions.length; i++) {
for (int j = 0; j < l2vals.length; j++) {
LossFunctions.LossFunction lf = lossFunctions[i];
Activation outputActivation = outputActivations[i];
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
.dataType(DataType.DOUBLE)
.optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT)
.updater(new NoOp())
.dist(new UniformDistribution(-2, 2)).seed(12345L).graphBuilder()
.addInputs("in")
.addLayer("0", new ConvolutionLayer.Builder(2, 2).stride(1, 1).nOut(3)
.activation(afn).build(), "in")
.addLayer("1", new BatchNormalization.Builder().useLogStd(useLogStd).build(), "0")
.addLayer("2", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX)
.kernelSize(2, 2).stride(1, 1).build(), "1")
.addLayer("3", new BatchNormalization.Builder().useLogStd(useLogStd).build(), "2")
.addLayer("4", new ActivationLayer.Builder().activation(afn).build(), "3")
.addLayer("5", new OutputLayer.Builder(lf).activation(outputActivation)
.nOut(nOut).build(), "4")
.setOutputs("5").setInputTypes(InputType.convolutional(hw, hw, depth))
.build();
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
.dataType(DataType.DOUBLE)
.optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT)
.updater(new NoOp())
.dist(new UniformDistribution(-2, 2)).seed(12345L).graphBuilder()
.addInputs("in")
.addLayer("0", new ConvolutionLayer.Builder(2, 2).stride(1, 1).nOut(3)
.activation(afn).build(), "in")
.addLayer("1", new BatchNormalization.Builder().useLogStd(useLogStd).build(), "0")
.addLayer("2", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX)
.kernelSize(2, 2).stride(1, 1).build(), "1")
.addLayer("3", new BatchNormalization.Builder().useLogStd(useLogStd).build(), "2")
.addLayer("4", new ActivationLayer.Builder().activation(afn).build(), "3")
.addLayer("5", new OutputLayer.Builder(lf).activation(outputActivation)
.nOut(nOut).build(), "4")
.setOutputs("5").setInputTypes(InputType.convolutional(hw, hw, depth))
.build();
ComputationGraph net = new ComputationGraph(conf);
net.init();
String name = new Object() {
}.getClass().getEnclosingMethod().getName();
ComputationGraph net = new ComputationGraph(conf);
net.init();
String name = new Object() {
}.getClass().getEnclosingMethod().getName();
if (doLearningFirst) {
//Run a number of iterations of learning
net.setInput(0, ds.getFeatures());
net.setLabels(ds.getLabels());
net.computeGradientAndScore();
double scoreBefore = net.score();
for (int k = 0; k < 20; k++)
net.fit(ds);
net.computeGradientAndScore();
double scoreAfter = net.score();
//Can't test in 'characteristic mode of operation' if not learning
String msg = name
+ " - score did not (sufficiently) decrease during learning - activationFn="
+ afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation
+ ", doLearningFirst= " + doLearningFirst + " (before=" + scoreBefore
+ ", scoreAfter=" + scoreAfter + ")";
assertTrue(msg, scoreAfter < 0.9 * scoreBefore);
}
if (PRINT_RESULTS) {
System.out.println(name + " - activationFn=" + afn + ", lossFn=" + lf
+ ", outputActivation=" + outputActivation + ", doLearningFirst="
+ doLearningFirst + ", l1=" + l1vals[j] + ", l2=" + l2vals[j]);
for (int k = 0; k < net.getNumLayers(); k++)
System.out.println("Layer " + k + " # params: " + net.getLayer(k).numParams());
}
//Mean and variance vars are not gradient checkable; mean/variance "gradient" is used to implement running mean/variance calc
//i.e., runningMean = decay * runningMean + (1-decay) * batchMean
//However, numerical gradient will be 0 as forward pass doesn't depend on this "parameter"
Set<String> excludeParams = new HashSet<>(Arrays.asList("1_mean", "1_var", "3_mean", "3_var", "1_log10stdev", "3_log10stdev"));
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE,
new INDArray[]{input}, new INDArray[]{labels}, null, null, excludeParams);
assertTrue(gradOK);
TestUtils.testModelSerialization(net);
if (doLearningFirst) {
//Run a number of iterations of learning
net.setInput(0, ds.getFeatures());
net.setLabels(ds.getLabels());
net.computeGradientAndScore();
double scoreBefore = net.score();
for (int k = 0; k < 20; k++)
net.fit(ds);
net.computeGradientAndScore();
double scoreAfter = net.score();
//Can't test in 'characteristic mode of operation' if not learning
String msg = name
+ " - score did not (sufficiently) decrease during learning - activationFn="
+ afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation
+ ", doLearningFirst= " + doLearningFirst + " (before=" + scoreBefore
+ ", scoreAfter=" + scoreAfter + ")";
assertTrue(msg, scoreAfter < 0.9 * scoreBefore);
}
if (PRINT_RESULTS) {
System.out.println(name + " - activationFn=" + afn + ", lossFn=" + lf
+ ", outputActivation=" + outputActivation + ", doLearningFirst="
+ doLearningFirst + ", l1=" + l1vals[j] + ", l2=" + l2vals[j]);
for (int k = 0; k < net.getNumLayers(); k++)
System.out.println("Layer " + k + " # params: " + net.getLayer(k).numParams());
}
//Mean and variance vars are not gradient checkable; mean/variance "gradient" is used to implement running mean/variance calc
//i.e., runningMean = decay * runningMean + (1-decay) * batchMean
//However, numerical gradient will be 0 as forward pass doesn't depend on this "parameter"
Set<String> excludeParams = new HashSet<>(Arrays.asList("1_mean", "1_var", "3_mean", "3_var", "1_log10stdev", "3_log10stdev"));
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE,
new INDArray[]{input}, new INDArray[]{labels}, null, null, excludeParams);
assertTrue(gradOK);
TestUtils.testModelSerialization(net);
}
}
}

View File

@ -151,7 +151,7 @@ public class CNN3DGradientCheckTest extends BaseDL4JTest {
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS,
DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS,
RETURN_ON_FIRST_FAILURE, input, labels);
RETURN_ON_FIRST_FAILURE, input, labels, null, null, true, 128);
assertTrue(msg, gradOK);
@ -255,7 +255,7 @@ public class CNN3DGradientCheckTest extends BaseDL4JTest {
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS,
DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS,
RETURN_ON_FIRST_FAILURE, input, labels);
RETURN_ON_FIRST_FAILURE, input, labels, null, null, true, 512);
assertTrue(msg, gradOK);

View File

@ -142,12 +142,6 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
// (a) activation function
// (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
// (c) Loss function (with specified output activations)
Activation[] activFns = {Activation.SIGMOID, Activation.TANH};
boolean[] characteristic = {false, true}; //If true: run some backprop steps first
LossFunctions.LossFunction[] lossFunctions =
{LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE};
Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here
DataSet ds = new IrisDataSetIterator(150, 150).next();
ds.normalizeZeroMeanZeroUnitVariance();
@ -159,72 +153,74 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
double[] l1vals = {0.0, 0.0, 0.5, 0.0};
double[] biasL2 = {0.0, 0.0, 0.0, 0.2};
double[] biasL1 = {0.0, 0.0, 0.6, 0.0};
Activation[] activFns = {Activation.SIGMOID, Activation.TANH, Activation.ELU, Activation.SOFTPLUS};
boolean[] characteristic = {false, true, false, true}; //If true: run some backprop steps first
for (Activation afn : activFns) {
for (boolean doLearningFirst : characteristic) {
for (int i = 0; i < lossFunctions.length; i++) {
for (int k = 0; k < l2vals.length; k++) {
LossFunctions.LossFunction lf = lossFunctions[i];
Activation outputActivation = outputActivations[i];
double l2 = l2vals[k];
double l1 = l1vals[k];
LossFunctions.LossFunction[] lossFunctions =
{LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE, LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE};
Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH, Activation.SOFTMAX, Activation.IDENTITY}; //i.e., lossFunctions[i] used with outputActivations[i] here
MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder()
.dataType(DataType.DOUBLE)
.l2(l2).l1(l1).l2Bias(biasL2[k]).l1Bias(biasL1[k])
.optimizationAlgo(
OptimizationAlgorithm.CONJUGATE_GRADIENT)
.seed(12345L).list()
.layer(0, new ConvolutionLayer.Builder(new int[]{1, 1}).nIn(1).nOut(6)
.weightInit(WeightInit.XAVIER).activation(afn)
.updater(new NoOp()).build())
.layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nOut(3)
.weightInit(WeightInit.XAVIER).updater(new NoOp()).build())
for( int i=0; i<l2vals.length; i++ ){
Activation afn = activFns[i];
boolean doLearningFirst = characteristic[i];
LossFunctions.LossFunction lf = lossFunctions[i];
Activation outputActivation = outputActivations[i];
double l2 = l2vals[i];
double l1 = l1vals[i];
.setInputType(InputType.convolutionalFlat(1, 4, 1));
MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder()
.dataType(DataType.DOUBLE)
.l2(l2).l1(l1).l2Bias(biasL2[i]).l1Bias(biasL1[i])
.optimizationAlgo(
OptimizationAlgorithm.CONJUGATE_GRADIENT)
.seed(12345L).list()
.layer(0, new ConvolutionLayer.Builder(new int[]{1, 1}).nIn(1).nOut(6)
.weightInit(WeightInit.XAVIER).activation(afn)
.updater(new NoOp()).build())
.layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nOut(3)
.weightInit(WeightInit.XAVIER).updater(new NoOp()).build())
MultiLayerConfiguration conf = builder.build();
.setInputType(InputType.convolutionalFlat(1, 4, 1));
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
String testName = new Object() {
}.getClass().getEnclosingMethod().getName();
MultiLayerConfiguration conf = builder.build();
if (doLearningFirst) {
//Run a number of iterations of learning
mln.setInput(ds.getFeatures());
mln.setLabels(ds.getLabels());
mln.computeGradientAndScore();
double scoreBefore = mln.score();
for (int j = 0; j < 10; j++)
mln.fit(ds);
mln.computeGradientAndScore();
double scoreAfter = mln.score();
//Can't test in 'characteristic mode of operation' if not learning
String msg = testName
+ "- score did not (sufficiently) decrease during learning - activationFn="
+ afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation
+ ", doLearningFirst=" + doLearningFirst + " (before=" + scoreBefore
+ ", scoreAfter=" + scoreAfter + ")";
assertTrue(msg, scoreAfter < 0.8 * scoreBefore);
}
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
String testName = new Object() {
}.getClass().getEnclosingMethod().getName();
if (PRINT_RESULTS) {
System.out.println(testName + "- activationFn=" + afn + ", lossFn=" + lf
+ ", outputActivation=" + outputActivation + ", doLearningFirst="
+ doLearningFirst);
for (int j = 0; j < mln.getnLayers(); j++)
System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(gradOK);
TestUtils.testModelSerialization(mln);
}
}
if (doLearningFirst) {
//Run a number of iterations of learning
mln.setInput(ds.getFeatures());
mln.setLabels(ds.getLabels());
mln.computeGradientAndScore();
double scoreBefore = mln.score();
for (int j = 0; j < 10; j++)
mln.fit(ds);
mln.computeGradientAndScore();
double scoreAfter = mln.score();
//Can't test in 'characteristic mode of operation' if not learning
String msg = testName
+ "- score did not (sufficiently) decrease during learning - activationFn="
+ afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation
+ ", doLearningFirst=" + doLearningFirst + " (before=" + scoreBefore
+ ", scoreAfter=" + scoreAfter + ")";
assertTrue(msg, scoreAfter < 0.8 * scoreBefore);
}
if (PRINT_RESULTS) {
System.out.println(testName + "- activationFn=" + afn + ", lossFn=" + lf
+ ", outputActivation=" + outputActivation + ", doLearningFirst="
+ doLearningFirst);
for (int j = 0; j < mln.getnLayers(); j++)
System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(gradOK);
TestUtils.testModelSerialization(mln);
}
}
@ -369,56 +365,43 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
int[] padding = {0, 0};
int size = 2;
String[] activations = {"sigmoid", "tanh"};
SubsamplingLayer.PoolingType[] poolingTypes =
new SubsamplingLayer.PoolingType[]{SubsamplingLayer.PoolingType.MAX,
SubsamplingLayer.PoolingType.AVG, SubsamplingLayer.PoolingType.PNORM};
for (int minibatchSize : minibatchSizes) {
INDArray input = Nd4j.rand(minibatchSize, width * height * inputDepth);
INDArray labels = TestUtils.randomOneHot(minibatchSize, nOut);
for (String afn : activations) {
for (SubsamplingLayer.PoolingType poolingType : poolingTypes) {
for (int minibatchSize : minibatchSizes) {
INDArray input = Nd4j.rand(minibatchSize, width * height * inputDepth);
INDArray labels = Nd4j.zeros(minibatchSize, nOut);
for (int i = 0; i < minibatchSize; i++) {
labels.putScalar(new int[]{i, i % nOut}, 1.0);
}
MultiLayerConfiguration conf =
new NeuralNetConfiguration.Builder()
.dataType(DataType.DOUBLE)
.updater(new NoOp())
.dist(new NormalDistribution(0, 1))
.list().layer(new ConvolutionLayer.Builder(kernel,
stride, padding).nIn(inputDepth)
.nOut(3).build())//output: (5-2+0)/1+1 = 4
.layer(new Upsampling2D.Builder().size(size).build()) //output: 4*2 =8 -> 8x8x3
.layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nIn(8 * 8 * 3)
.nOut(4).build())
.setInputType(InputType.convolutionalFlat(height, width,
inputDepth))
.build();
MultiLayerConfiguration conf =
new NeuralNetConfiguration.Builder()
.dataType(DataType.DOUBLE)
.updater(new NoOp())
.dist(new NormalDistribution(0, 1))
.list().layer(new ConvolutionLayer.Builder(kernel,
stride, padding).nIn(inputDepth)
.nOut(3).build())//output: (5-2+0)/1+1 = 4
.layer(new Upsampling2D.Builder().size(size).build()) //output: 4*2 =8 -> 8x8x3
.layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nIn(8 * 8 * 3)
.nOut(4).build())
.setInputType(InputType.convolutionalFlat(height, width,
inputDepth))
.build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
String msg = "Upsampling - minibatch=" + minibatchSize;
String msg = "PoolingType=" + poolingType + ", minibatch=" + minibatchSize + ", activationFn="
+ afn;
if (PRINT_RESULTS) {
System.out.println(msg);
for (int j = 0; j < net.getnLayers(); j++)
System.out.println("Layer " + j + " # params: " + net.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(msg, gradOK);
TestUtils.testModelSerialization(net);
}
if (PRINT_RESULTS) {
System.out.println(msg);
for (int j = 0; j < net.getnLayers(); j++)
System.out.println("Layer " + j + " # params: " + net.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(msg, gradOK);
TestUtils.testModelSerialization(net);
}
}
@ -695,60 +678,56 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
public void testCnnSamePaddingMode() {
int nOut = 2;
int[] minibatchSizes = {1, 3};
int[] minibatchSizes = {1, 3, 3, 2, 1, 2};
int[] heights = new int[]{4, 5, 6, 5, 4, 4}; //Same padding mode: insensitive to exact input size...
int[] kernelSizes = new int[]{2, 3, 2, 3, 2, 3};
int[] inputDepths = {1, 2, 4, 3, 2, 3};
int width = 5;
int[] heights = new int[]{4, 5, 6}; //Same padding mode: insensitive to exact input size...
int[] kernelSizes = new int[]{2, 3};
int[] inputDepths = {1, 2, 4};
Nd4j.getRandom().setSeed(12345);
for (int inputDepth : inputDepths) {
for (int minibatchSize : minibatchSizes) {
for (int height : heights) {
for (int k : kernelSizes) {
for( int i=0; i<minibatchSizes.length; i++ ){
int inputDepth = inputDepths[i];
int minibatchSize = minibatchSizes[i];
int height = heights[i];
int k = kernelSizes[i];
INDArray input = Nd4j.rand(minibatchSize, width * height * inputDepth);
INDArray labels = Nd4j.zeros(minibatchSize, nOut);
for (int i = 0; i < minibatchSize; i++) {
labels.putScalar(new int[]{i, i % nOut}, 1.0);
}
INDArray input = Nd4j.rand(minibatchSize, width * height * inputDepth);
INDArray labels = TestUtils.randomOneHot(minibatchSize, nOut);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
.dataType(DataType.DOUBLE)
.updater(new NoOp())
.activation(Activation.TANH).convolutionMode(Same).list()
.layer(0, new ConvolutionLayer.Builder().name("layer 0").kernelSize(k, k)
.stride(1, 1).padding(0, 0).nIn(inputDepth).nOut(2).build())
.layer(1, new SubsamplingLayer.Builder()
.poolingType(SubsamplingLayer.PoolingType.MAX).kernelSize(k, k)
.stride(1, 1).padding(0, 0).build())
.layer(2, new ConvolutionLayer.Builder().nIn(2).nOut(2).kernelSize(k, k)
.stride(1, 1).padding(0, 0).build())
.layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nOut(nOut).build())
.setInputType(InputType.convolutionalFlat(height, width, inputDepth)).build();
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
.dataType(DataType.DOUBLE)
.updater(new NoOp())
.activation(Activation.TANH).convolutionMode(Same).list()
.layer(0, new ConvolutionLayer.Builder().name("layer 0").kernelSize(k, k)
.stride(1, 1).padding(0, 0).nIn(inputDepth).nOut(2).build())
.layer(1, new SubsamplingLayer.Builder()
.poolingType(SubsamplingLayer.PoolingType.MAX).kernelSize(k, k)
.stride(1, 1).padding(0, 0).build())
.layer(2, new ConvolutionLayer.Builder().nIn(2).nOut(2).kernelSize(k, k)
.stride(1, 1).padding(0, 0).build())
.layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nOut(nOut).build())
.setInputType(InputType.convolutionalFlat(height, width, inputDepth)).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
for (int i = 0; i < net.getLayers().length; i++) {
System.out.println("nParams, layer " + i + ": " + net.getLayer(i).numParams());
}
String msg = "Minibatch=" + minibatchSize + ", inDepth=" + inputDepth + ", height=" + height
+ ", kernelSize=" + k;
System.out.println(msg);
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(msg, gradOK);
TestUtils.testModelSerialization(net);
}
}
for (int j = 0; j < net.getLayers().length; j++) {
System.out.println("nParams, layer " + j + ": " + net.getLayer(j).numParams());
}
String msg = "Minibatch=" + minibatchSize + ", inDepth=" + inputDepth + ", height=" + height
+ ", kernelSize=" + k;
System.out.println(msg);
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(msg, gradOK);
TestUtils.testModelSerialization(net);
}
}
@ -809,7 +788,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input,
labels);
labels, null, null, true, 128);
assertTrue(msg, gradOK);
@ -827,68 +806,66 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
Nd4j.getRandom().setSeed(12345);
int nOut = 4;
int[] minibatchSizes = {1, 3};
int width = 6;
int height = 6;
int[] inputDepths = {1, 3};
int[] kernel = {2, 2};
int[] stride = {1, 1};
int[] padding = {0, 0};
int[] minibatchSizes = {1, 3, 2};
int[] inputDepths = {1, 3, 2};
int[][] zeroPadLayer = new int[][]{{0, 0, 0, 0}, {1, 1, 0, 0}, {2, 2, 2, 2}};
for (int inputDepth : inputDepths) {
for (int minibatchSize : minibatchSizes) {
INDArray input = Nd4j.rand(new int[]{minibatchSize, inputDepth, height, width});
INDArray labels = Nd4j.zeros(minibatchSize, nOut);
for (int i = 0; i < minibatchSize; i++) {
labels.putScalar(new int[]{i, i % nOut}, 1.0);
}
for (int[] zeroPad : zeroPadLayer) {
for( int i=0; i<minibatchSizes.length; i++ ){
int minibatchSize = minibatchSizes[i];
int inputDepth = inputDepths[i];
int[] zeroPad = zeroPadLayer[i];
INDArray input = Nd4j.rand(DataType.DOUBLE, new int[]{minibatchSize, inputDepth, height, width});
INDArray labels = TestUtils.randomOneHot(minibatchSize, nOut);
MultiLayerConfiguration conf =
new NeuralNetConfiguration.Builder().updater(new NoOp())
.dataType(DataType.DOUBLE)
.dist(new NormalDistribution(0, 1)).list()
.layer(0, new ConvolutionLayer.Builder(kernel, stride, padding)
.nIn(inputDepth).nOut(3).build())//output: (6-2+0)/1+1 = 5
.layer(1, new ZeroPaddingLayer.Builder(zeroPad).build()).layer(2,
new ConvolutionLayer.Builder(kernel, stride,
padding).nIn(3).nOut(3).build())//output: (6-2+0)/1+1 = 5
.layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nOut(4).build())
.setInputType(InputType.convolutional(height, width, inputDepth))
.build();
MultiLayerConfiguration conf =
new NeuralNetConfiguration.Builder().updater(new NoOp())
.dataType(DataType.DOUBLE)
.dist(new NormalDistribution(0, 1)).list()
.layer(0, new ConvolutionLayer.Builder(kernel, stride, padding)
.nIn(inputDepth).nOut(3).build())//output: (6-2+0)/1+1 = 5
.layer(1, new ZeroPaddingLayer.Builder(zeroPad).build()).layer(2,
new ConvolutionLayer.Builder(kernel, stride,
padding).nIn(3).nOut(3).build())//output: (6-2+0)/1+1 = 5
.layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nOut(4).build())
.setInputType(InputType.convolutional(height, width, inputDepth))
.build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
//Check zero padding activation shape
org.deeplearning4j.nn.layers.convolution.ZeroPaddingLayer zpl =
(org.deeplearning4j.nn.layers.convolution.ZeroPaddingLayer) net.getLayer(1);
val expShape = new long[]{minibatchSize, inputDepth, height + zeroPad[0] + zeroPad[1],
width + zeroPad[2] + zeroPad[3]};
INDArray out = zpl.activate(input, false, LayerWorkspaceMgr.noWorkspaces());
assertArrayEquals(expShape, out.shape());
//Check zero padding activation shape
org.deeplearning4j.nn.layers.convolution.ZeroPaddingLayer zpl =
(org.deeplearning4j.nn.layers.convolution.ZeroPaddingLayer) net.getLayer(1);
val expShape = new long[]{minibatchSize, inputDepth, height + zeroPad[0] + zeroPad[1],
width + zeroPad[2] + zeroPad[3]};
INDArray out = zpl.activate(input, false, LayerWorkspaceMgr.noWorkspaces());
assertArrayEquals(expShape, out.shape());
String msg = "minibatch=" + minibatchSize + ", channels=" + inputDepth + ", zeroPad = "
+ Arrays.toString(zeroPad);
String msg = "minibatch=" + minibatchSize + ", channels=" + inputDepth + ", zeroPad = "
+ Arrays.toString(zeroPad);
if (PRINT_RESULTS) {
System.out.println(msg);
for (int j = 0; j < net.getnLayers(); j++)
System.out.println("Layer " + j + " # params: " + net.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(msg, gradOK);
TestUtils.testModelSerialization(net);
}
if (PRINT_RESULTS) {
System.out.println(msg);
for (int j = 0; j < net.getnLayers(); j++)
System.out.println("Layer " + j + " # params: " + net.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(msg, gradOK);
TestUtils.testModelSerialization(net);
}
}
@ -896,12 +873,12 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
public void testDeconvolution2D() {
int nOut = 2;
int[] minibatchSizes = new int[]{1, 3, 1, 3, 1, 3, 1, 3};
int[] kernelSizes = new int[]{1, 1, 3, 3, 1, 1, 3, 3};
int[] strides = {1, 1, 1, 1, 2, 2, 2, 2};
int[] dilation = {1, 2, 2, 1, 1, 1, 2, 2};
Activation[] activations = new Activation[]{Activation.SIGMOID, Activation.TANH, Activation.TANH, Activation.TANH, Activation.TANH, Activation.SIGMOID, Activation.SIGMOID, Activation.SIGMOID};
ConvolutionMode[] cModes = new ConvolutionMode[]{Same, Same, Same, Same, Truncate, Truncate, Truncate, Truncate};
int[] minibatchSizes = new int[]{1, 3, 3, 1, 3};
int[] kernelSizes = new int[]{1, 1, 1, 3, 3};
int[] strides = {1, 1, 2, 2, 2};
int[] dilation = {1, 2, 1, 2, 2};
Activation[] activations = new Activation[]{Activation.SIGMOID, Activation.TANH, Activation.SIGMOID, Activation.SIGMOID, Activation.SIGMOID};
ConvolutionMode[] cModes = new ConvolutionMode[]{Same, Same, Truncate, Truncate, Truncate};
int width = 7;
int height = 7;
int inputDepth = 3;
@ -954,7 +931,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
System.out.println(msg);
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels, null, null, true, 100);
assertTrue(msg, gradOK);
@ -967,21 +944,17 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
int nOut = 2;
int[] minibatchSizes = new int[]{1, 3};
int width = 8;
int height = 8;
int width = 6;
int height = 6;
int inputDepth = 3;
int[] kernelSizes = new int[]{2, 3};
int[] strides = {1, 2};
int[] dilation = {1, 2};
ConvolutionMode[] cModes = new ConvolutionMode[]{ConvolutionMode.Truncate};
Nd4j.getRandom().setSeed(12345);
int[] ks = new int[]{1, 3, 1, 3, 1, 3, 1, 3};
int[] ss = new int[]{1, 1, 2, 2, 1, 1, 2, 2};
int[] ds = new int[]{1, 1, 1, 1, 2, 2, 2, 2};
ConvolutionMode[] cms = new ConvolutionMode[]{Truncate, Truncate, Truncate, Truncate, Truncate, Truncate, Truncate, Truncate};
int[] mb = new int[]{1, 1, 3, 3, 3, 1, 3, 3};
int[] ks = new int[]{1, 3, 3, 1, 3};
int[] ss = new int[]{1, 1, 1, 2, 2};
int[] ds = new int[]{1, 1, 2, 2, 2};
ConvolutionMode[] cms = new ConvolutionMode[]{Truncate, Truncate, Truncate, Truncate, Truncate};
int[] mb = new int[]{1, 1, 1, 3, 3};
for (int t = 0; t < ks.length; t++) {
@ -1030,7 +1003,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
System.out.println(msg);
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels, null, null, true, 50); //Most params are in output layer
assertTrue(msg, gradOK);
@ -1042,18 +1015,18 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
public void testCnnDilated() {
int nOut = 2;
int minibatchSize = 3;
int minibatchSize = 2;
int width = 8;
int height = 8;
int inputDepth = 3;
int inputDepth = 2;
Nd4j.getRandom().setSeed(12345);
boolean[] sub = new boolean[]{true, false, true, false, true, false, true, false};
int[] stride = new int[]{1, 1, 2, 2, 1, 1, 2, 2};
int[] kernel = new int[]{2, 2, 2, 2, 3, 3, 3, 3};
int[] ds = new int[]{2, 3, 3, 2, 2, 3, 3, 2};
ConvolutionMode[] cms = new ConvolutionMode[]{Same, Same, Same, Truncate, Truncate, Truncate, Same, Truncate};
boolean[] sub = new boolean[]{true, true, false, true, false};
int[] stride = new int[]{1, 1, 1, 2, 2};
int[] kernel = new int[]{2, 3, 3, 3, 3};
int[] ds = new int[]{2, 2, 3, 3, 2};
ConvolutionMode[] cms = new ConvolutionMode[]{Same, Truncate, Truncate, Same, Truncate};
for (int t = 0; t < sub.length; t++) {
@ -1126,7 +1099,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
@Test
public void testCropping2DLayer() {
Nd4j.getRandom().setSeed(12345);
int nOut = 4;
int nOut = 2;
int[] minibatchSizes = {1, 3};
int width = 12;
@ -1155,11 +1128,12 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
.convolutionMode(ConvolutionMode.Same)
.weightInit(new NormalDistribution(0, 1)).list()
.layer(new ConvolutionLayer.Builder(kernel, stride, padding)
.nIn(inputDepth).nOut(3).build())//output: (6-2+0)/1+1 = 5
.nIn(inputDepth).nOut(2).build())//output: (6-2+0)/1+1 = 5
.layer(new Cropping2D(crop))
.layer(new ConvolutionLayer.Builder(kernel, stride, padding).nIn(3).nOut(3).build())
.layer(new ConvolutionLayer.Builder(kernel, stride, padding).nIn(2).nOut(2).build())
.layer(new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.AVG).kernelSize(3, 3).stride(3, 3).build())
.layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nOut(4).build())
.activation(Activation.SOFTMAX).nOut(nOut).build())
.setInputType(InputType.convolutional(height, width, inputDepth))
.build();
@ -1184,7 +1158,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
}
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels, null, null, true, 160);
assertTrue(msg, gradOK);
@ -1200,16 +1174,16 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
int depthMultiplier = 2;
int nOut = nIn * depthMultiplier;
int width = 8;
int height = 8;
int width = 5;
int height = 5;
Nd4j.getRandom().setSeed(12345);
int[] ks = new int[]{1,3,1,3,1,3,1,3};
int[] ss = new int[]{1,1,2,2,1,1,2,2};
int[] ks = new int[]{1,3,3,1,3};
int[] ss = new int[]{1,1,1,2,2};
ConvolutionMode[] cms = new ConvolutionMode[]{
Truncate, Truncate, Truncate, Truncate, Truncate, Truncate, Truncate, Truncate};
int[] mb = new int[]{1,1,3,3,3,1,3,3};
Truncate, Truncate, Truncate, Truncate, Truncate};
int[] mb = new int[]{1,1,1,3,3};
for( int t=0; t<ks.length; t++ ){
@ -1255,7 +1229,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
System.out.println(msg);
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels, null, null, true, 256);
assertTrue(msg, gradOK);

View File

@ -39,6 +39,8 @@ import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.learning.config.NoOp;
import org.nd4j.linalg.lossfunctions.impl.LossNegativeLogLikelihood;
import java.util.Random;
public class CapsnetGradientCheckTest extends BaseDL4JTest {
private static final boolean PRINT_RESULTS = true;
@ -70,6 +72,7 @@ public class CapsnetGradientCheckTest extends BaseDL4JTest {
for (int capsule : capsules) {
for (int capsuleDim : capsuleDims) {
for (int minibatchSize : minibatchSizes) {
INDArray input = Nd4j.rand(minibatchSize, inputDepth * height * width).mul(10)
.reshape(-1, inputDepth, height, width);
INDArray labels = Nd4j.zeros(minibatchSize, capsule);
@ -110,7 +113,7 @@ public class CapsnetGradientCheckTest extends BaseDL4JTest {
boolean gradOK = GradientCheckUtil
.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input,
labels);
labels, null, null, true, 100);
assertTrue(msg, gradOK);

View File

@ -100,15 +100,15 @@ public class DropoutGradientCheck extends BaseDL4JTest {
.list();
if(cnn){
builder.layer(new ConvolutionLayer.Builder().kernelSize(3,3).stride(1,1).nOut(3).build());
builder.layer(new ConvolutionLayer.Builder().kernelSize(3,3).stride(1,1).nOut(3).build());
builder.setInputType(InputType.convolutional(8,8,3));
builder.layer(new ConvolutionLayer.Builder().kernelSize(3,3).stride(2,2).nOut(2).build());
builder.layer(new ConvolutionLayer.Builder().kernelSize(3,3).stride(2,2).nOut(2).build());
builder.setInputType(InputType.convolutional(6,6,2));
} else {
builder.layer(new DenseLayer.Builder().nOut(12).build());
builder.layer(new DenseLayer.Builder().nOut(12).build());
builder.setInputType(InputType.feedForward(8));
builder.layer(new DenseLayer.Builder().nOut(3).build());
builder.layer(new DenseLayer.Builder().nOut(3).build());
builder.setInputType(InputType.feedForward(6));
}
builder.layer(new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).lossFunction(LossFunction.MCXENT).build());
builder.layer(new OutputLayer.Builder().nOut(3).activation(Activation.SOFTMAX).lossFunction(LossFunction.MCXENT).build());
MultiLayerConfiguration conf = builder.build();
//Remove spatial dropout from output layer - can't be used for 2d input
@ -123,11 +123,11 @@ public class DropoutGradientCheck extends BaseDL4JTest {
INDArray f;
if(cnn){
f = Nd4j.rand(new int[]{minibatch, 3, 8, 8}).muli(10).subi(5);
f = Nd4j.rand(new int[]{minibatch, 2, 6, 6}).muli(10).subi(5);
} else {
f = Nd4j.rand(minibatch, 8).muli(10).subi(5);
f = Nd4j.rand(minibatch, 6).muli(10).subi(5);
}
INDArray l = TestUtils.randomOneHot(minibatch, 10);
INDArray l = TestUtils.randomOneHot(minibatch, 3);
log.info("*** Starting test: " + msg + " ***");
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,

View File

@ -24,6 +24,7 @@ import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.*;
import org.deeplearning4j.nn.conf.layers.recurrent.SimpleRnn;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
import org.junit.Test;
import org.nd4j.linalg.activations.Activation;
@ -53,11 +54,11 @@ public class GlobalPoolingGradientCheckTests extends BaseDL4JTest {
private static final double DEFAULT_MIN_ABS_ERROR = 1e-8;
@Test
public void testLSTMGlobalPoolingBasicMultiLayer() {
public void testRNNGlobalPoolingBasicMultiLayer() {
//Basic test of global pooling w/ LSTM
Nd4j.getRandom().setSeed(12345L);
int timeSeriesLength = 10;
int timeSeriesLength = 5;
int nIn = 5;
int layerSize = 4;
int nOut = 2;
@ -73,7 +74,7 @@ public class GlobalPoolingGradientCheckTests extends BaseDL4JTest {
.dataType(DataType.DOUBLE)
.updater(new NoOp())
.dist(new NormalDistribution(0, 1.0)).seed(12345L).list()
.layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH)
.layer(0, new SimpleRnn.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH)
.build())
.layer(1, new GlobalPoolingLayer.Builder().poolingType(pt).build())
.layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
@ -84,20 +85,9 @@ public class GlobalPoolingGradientCheckTests extends BaseDL4JTest {
mln.init();
Random r = new Random(12345L);
INDArray input = Nd4j.zeros(miniBatchSize, nIn, timeSeriesLength);
for (int i = 0; i < miniBatchSize; i++) {
for (int j = 0; j < nIn; j++) {
for (int k = 0; k < timeSeriesLength; k++) {
input.putScalar(new int[] {i, j, k}, r.nextDouble() - 0.5);
}
}
}
INDArray input = Nd4j.rand(DataType.DOUBLE, miniBatchSize, nIn, timeSeriesLength).subi(0.5);
INDArray labels = Nd4j.zeros(miniBatchSize, nOut);
for (int i = 0; i < miniBatchSize; i++) {
int idx = r.nextInt(nOut);
labels.putScalar(i, idx, 1.0);
}
INDArray labels = TestUtils.randomOneHot(miniBatchSize, nOut).castTo(DataType.DOUBLE);
if (PRINT_RESULTS) {
System.out.println("testLSTMGlobalPoolingBasicMultiLayer() - " + pt + ", minibatch = "
@ -175,12 +165,12 @@ public class GlobalPoolingGradientCheckTests extends BaseDL4JTest {
@Test
public void testLSTMWithMasking() {
//Basic test of GravesLSTM layer
//Basic test of LSTM layer
Nd4j.getRandom().setSeed(12345L);
int timeSeriesLength = 10;
int nIn = 5;
int layerSize = 4;
int timeSeriesLength = 5;
int nIn = 4;
int layerSize = 3;
int nOut = 2;
int miniBatchSize = 3;
@ -193,7 +183,7 @@ public class GlobalPoolingGradientCheckTests extends BaseDL4JTest {
.dataType(DataType.DOUBLE)
.updater(new NoOp())
.dist(new NormalDistribution(0, 1.0)).seed(12345L).list()
.layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH)
.layer(0, new LSTM.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH)
.build())
.layer(1, new GlobalPoolingLayer.Builder().poolingType(pt).build())
.layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
@ -204,14 +194,7 @@ public class GlobalPoolingGradientCheckTests extends BaseDL4JTest {
mln.init();
Random r = new Random(12345L);
INDArray input = Nd4j.zeros(miniBatchSize, nIn, timeSeriesLength);
for (int i = 0; i < miniBatchSize; i++) {
for (int j = 0; j < nIn; j++) {
for (int k = 0; k < timeSeriesLength; k++) {
input.putScalar(new int[] {i, j, k}, r.nextDouble() - 0.5);
}
}
}
INDArray input = Nd4j.rand(DataType.DOUBLE, miniBatchSize, nIn, timeSeriesLength).subi(0.5);
INDArray featuresMask = Nd4j.create(miniBatchSize, timeSeriesLength);
for (int i = 0; i < miniBatchSize; i++) {
@ -221,12 +204,7 @@ public class GlobalPoolingGradientCheckTests extends BaseDL4JTest {
}
}
INDArray labels = Nd4j.zeros(miniBatchSize, nOut);
for (int i = 0; i < miniBatchSize; i++) {
int idx = r.nextInt(nOut);
labels.putScalar(i, idx, 1.0);
}
INDArray labels = TestUtils.randomOneHot(miniBatchSize, nOut);
mln.setLayerMaskArrays(featuresMask, null);
if (PRINT_RESULTS) {

View File

@ -32,6 +32,7 @@ import org.deeplearning4j.nn.conf.graph.rnn.LastTimeStepVertex;
import org.deeplearning4j.nn.conf.graph.rnn.ReverseTimeSeriesVertex;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.*;
import org.deeplearning4j.nn.conf.layers.recurrent.SimpleRnn;
import org.deeplearning4j.nn.conf.preprocessor.CnnToFeedForwardPreProcessor;
import org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor;
import org.deeplearning4j.nn.conf.preprocessor.RnnToFeedForwardPreProcessor;
@ -334,7 +335,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest {
}
@Test
public void testLSTMWithMerging() {
public void testRNNWithMerging() {
Nd4j.getRandom().setSeed(12345);
ComputationGraphConfiguration conf =
@ -345,23 +346,23 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest {
.updater(new NoOp()).graphBuilder().addInputs("input")
.setOutputs("out")
.addLayer("lstm1",
new GravesLSTM.Builder().nIn(3).nOut(4)
new SimpleRnn.Builder().nIn(3).nOut(3)
.activation(Activation.TANH).build(),
"input")
.addLayer("lstm2",
new GravesLSTM.Builder().nIn(4).nOut(4)
new SimpleRnn.Builder().nIn(3).nOut(3)
.activation(Activation.TANH).build(),
"lstm1")
.addLayer("dense1",
new DenseLayer.Builder().nIn(4).nOut(4)
new DenseLayer.Builder().nIn(3).nOut(3)
.activation(Activation.SIGMOID).build(),
"lstm1")
.addLayer("lstm3",
new GravesLSTM.Builder().nIn(4).nOut(4)
new SimpleRnn.Builder().nIn(3).nOut(3)
.activation(Activation.TANH).build(),
"dense1")
.addVertex("merge", new MergeVertex(), "lstm2", "lstm3")
.addLayer("out", new RnnOutputLayer.Builder().nIn(8).nOut(3)
.addLayer("out", new RnnOutputLayer.Builder().nIn(6).nOut(3)
.activation(Activation.SOFTMAX)
.lossFunction(LossFunctions.LossFunction.MCXENT).build(),
"merge")
@ -373,13 +374,8 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest {
graph.init();
Random r = new Random(12345);
INDArray input = Nd4j.rand(new int[] {3, 3, 5});
INDArray labels = Nd4j.zeros(3, 3, 5);
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 5; j++) {
labels.putScalar(new int[] {i, r.nextInt(3), j}, 1.0);
}
}
INDArray input = Nd4j.rand(new int[] {2, 3, 4});
INDArray labels = TestUtils.randomOneHotTimeSeries(2, 3, 4);
if (PRINT_RESULTS) {
System.out.println("testLSTMWithMerging()");
@ -401,13 +397,12 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest {
Nd4j.getRandom().setSeed(1234);
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(1234)
.dataType(DataType.DOUBLE)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
.dist(new NormalDistribution(0, 1))
.weightInit(new NormalDistribution(0, 1))
.updater(new NoOp()).graphBuilder().addInputs("input").setOutputs("out")
.addLayer("lstm1", new GravesLSTM.Builder().nIn(3).nOut(8).activation(Activation.TANH).build(),
.addLayer("lstm1", new LSTM.Builder().nIn(3).nOut(6).activation(Activation.TANH).build(),
"input")
.addVertex("subset", new SubsetVertex(0, 3), "lstm1")
.addLayer("out", new RnnOutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX)
.addVertex("subset", new SubsetVertex(0, 2), "lstm1")
.addLayer("out", new RnnOutputLayer.Builder().nIn(3).nOut(2).activation(Activation.SOFTMAX)
.lossFunction(LossFunctions.LossFunction.MCXENT).build(), "subset")
.build();
@ -415,13 +410,8 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest {
graph.init();
Random r = new Random(12345);
INDArray input = Nd4j.rand(new int[] {3, 3, 5});
INDArray labels = Nd4j.zeros(3, 3, 5);
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 5; j++) {
labels.putScalar(new int[] {i, r.nextInt(3), j}, 1.0);
}
}
INDArray input = Nd4j.rand(new int[] {2, 3, 4});
INDArray labels = TestUtils.randomOneHotTimeSeries(2, 2, 4);
if (PRINT_RESULTS) {
System.out.println("testLSTMWithSubset()");
@ -447,10 +437,10 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest {
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
.dist(new NormalDistribution(0, 1))
.updater(new NoOp()).graphBuilder().addInputs("input").setOutputs("out")
.addLayer("lstm1", new GravesLSTM.Builder().nIn(3).nOut(4).activation(Activation.TANH).build(),
.addLayer("lstm1", new LSTM.Builder().nIn(3).nOut(4).activation(Activation.TANH).build(),
"input")
.addVertex("lastTS", new LastTimeStepVertex("input"), "lstm1")
.addLayer("out", new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX)
.addLayer("out", new OutputLayer.Builder().nIn(4).nOut(2).activation(Activation.SOFTMAX)
.lossFunction(LossFunctions.LossFunction.MCXENT).build(), "lastTS")
.build();
@ -458,11 +448,8 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest {
graph.init();
Random r = new Random(12345);
INDArray input = Nd4j.rand(new int[] {3, 3, 5});
INDArray labels = Nd4j.zeros(3, 3); //Here: labels are 2d (due to LastTimeStepVertex)
for (int i = 0; i < 3; i++) {
labels.putScalar(new int[] {i, r.nextInt(3)}, 1.0);
}
INDArray input = Nd4j.rand(new int[] {2, 3, 4});
INDArray labels = TestUtils.randomOneHot(2, 2); //Here: labels are 2d (due to LastTimeStepVertex)
if (PRINT_RESULTS) {
System.out.println("testLSTMWithLastTimeStepVertex()");
@ -503,16 +490,16 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest {
.updater(new NoOp()).graphBuilder()
.addInputs("input1", "input2").setOutputs("out")
.addLayer("lstm1",
new GravesLSTM.Builder().nIn(3).nOut(4)
new LSTM.Builder().nIn(3).nOut(3)
.activation(Activation.TANH).build(),
"input1")
.addLayer("lstm2",
new GravesLSTM.Builder().nIn(4).nOut(5)
new LSTM.Builder().nIn(2).nOut(4)
.activation(Activation.SOFTSIGN).build(),
"input2")
.addVertex("lastTS", new LastTimeStepVertex("input2"), "lstm2")
.addVertex("duplicate", new DuplicateToTimeSeriesVertex("input2"), "lastTS")
.addLayer("out", new RnnOutputLayer.Builder().nIn(5 + 4).nOut(3)
.addLayer("out", new RnnOutputLayer.Builder().nIn(3+4).nOut(2)
.activation(Activation.SOFTMAX)
.lossFunction(LossFunctions.LossFunction.MCXENT).build(),
"lstm1", "duplicate")
@ -522,14 +509,9 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest {
graph.init();
Random r = new Random(12345);
INDArray input1 = Nd4j.rand(new int[] {3, 3, 5});
INDArray input2 = Nd4j.rand(new int[] {3, 4, 5});
INDArray labels = Nd4j.zeros(3, 3, 5);
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 5; j++) {
labels.putScalar(new int[] {i, r.nextInt(3), j}, 1.0);
}
}
INDArray input1 = Nd4j.rand(new int[] {2, 3, 4});
INDArray input2 = Nd4j.rand(new int[] {2, 2, 4});
INDArray labels = TestUtils.randomOneHotTimeSeries(2, 2, 4);
if (PRINT_RESULTS) {
System.out.println("testLSTMWithDuplicateToTimeSeries()");
@ -558,16 +540,16 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest {
.updater(new NoOp()).graphBuilder()
.addInputs("input").setOutputs("out")
.addLayer("lstm_a",
new GravesLSTM.Builder().nIn(3).nOut(4)
new LSTM.Builder().nIn(2).nOut(3)
.activation(Activation.TANH).build(),
"input")
.addVertex("input_rev", new ReverseTimeSeriesVertex("input"), "input")
.addLayer("lstm_b",
new GravesLSTM.Builder().nIn(3).nOut(4)
new LSTM.Builder().nIn(2).nOut(3)
.activation(Activation.TANH).build(),
"input_rev")
.addVertex("lstm_b_rev", new ReverseTimeSeriesVertex("input"), "lstm_b")
.addLayer("out", new RnnOutputLayer.Builder().nIn(4 + 4).nOut(3)
.addLayer("out", new RnnOutputLayer.Builder().nIn(3 + 3).nOut(2)
.activation(Activation.SOFTMAX)
.lossFunction(LossFunctions.LossFunction.MCXENT).build(),
"lstm_a", "lstm_b_rev")
@ -577,13 +559,8 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest {
graph.init();
Random r = new Random(12345);
INDArray input = Nd4j.rand(new int[] {3, 3, 5});
INDArray labels = Nd4j.zeros(3, 3, 5);
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 5; j++) {
labels.putScalar(new int[] {i, r.nextInt(3), j}, 1.0);
}
}
INDArray input = Nd4j.rand(new int[] {2, 2, 4});
INDArray labels = TestUtils.randomOneHotTimeSeries(2, 2, 4);
if (PRINT_RESULTS) {
System.out.println("testLSTMWithReverseTimeSeriesVertex()");
@ -1171,10 +1148,10 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest {
.dist(new NormalDistribution(0, 1))
.activation(Activation.TANH).updater(new NoOp()).graphBuilder()
.addInputs("in1", "in2")
.addLayer("d0", new GravesLSTM.Builder().nIn(layerSizes).nOut(layerSizes).build(), "in1")
.addLayer("d1", new GravesLSTM.Builder().nIn(layerSizes).nOut(layerSizes).build(), "in2")
.addLayer("d0", new SimpleRnn.Builder().nIn(layerSizes).nOut(layerSizes).build(), "in1")
.addLayer("d1", new SimpleRnn.Builder().nIn(layerSizes).nOut(layerSizes).build(), "in2")
.addVertex("stack", new StackVertex(), "d0", "d1")
.addLayer("d2", new GravesLSTM.Builder().nIn(layerSizes).nOut(layerSizes).build(), "stack")
.addLayer("d2", new SimpleRnn.Builder().nIn(layerSizes).nOut(layerSizes).build(), "stack")
.addVertex("u1", new UnstackVertex(0, 2), "d2").addVertex("u2", new UnstackVertex(1, 2), "d2")
.addLayer("p1", new GlobalPoolingLayer.Builder(PoolingType.AVG).build(), "u1")
.addLayer("p2", new GlobalPoolingLayer.Builder(PoolingType.AVG).build(), "u2")
@ -1193,7 +1170,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest {
INDArray newParams = Nd4j.rand(new long[]{1, nParams});
graph.setParams(newParams);
int[] mbSizes = new int[] {1, 3, 10};
int[] mbSizes = new int[] {1, 2, 3};
for (int minibatch : mbSizes) {
INDArray in1 = Nd4j.rand(new int[] {minibatch, layerSizes, 4});

View File

@ -25,6 +25,7 @@ import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.*;
import org.deeplearning4j.nn.conf.layers.recurrent.SimpleRnn;
import org.deeplearning4j.nn.graph.ComputationGraph;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
import org.junit.Test;
@ -85,7 +86,7 @@ public class GradientCheckTestsMasking extends BaseDL4JTest {
mask[3] = new boolean[] {false, false, true, false, true}; //time series classification w/ variable length TS
mask[4] = new boolean[] {true, true, true, false, true}; //variable length TS
int nIn = 4;
int nIn = 3;
int layerSize = 3;
GradientCheckSimpleScenario[] scenarios = new GradientCheckSimpleScenario[] {
@ -94,23 +95,14 @@ public class GradientCheckTestsMasking extends BaseDL4JTest {
new GradientCheckSimpleScenario(LossMixtureDensity.builder().gaussians(2).labelWidth(3).build(),
Activation.TANH, 10, 3),
new GradientCheckSimpleScenario(LossMixtureDensity.builder().gaussians(2).labelWidth(4).build(),
Activation.IDENTITY, 12, 4),
new GradientCheckSimpleScenario(LossFunctions.LossFunction.L2.getILossFunction(),
Activation.SOFTMAX, 2, 2)};
Activation.IDENTITY, 12, 4)};
for (GradientCheckSimpleScenario s : scenarios) {
Random r = new Random(12345L);
INDArray input = Nd4j.zeros(1, nIn, timeSeriesLength);
for (int m = 0; m < 1; m++) {
for (int j = 0; j < nIn; j++) {
for (int k = 0; k < timeSeriesLength; k++) {
input.putScalar(new int[] {m, j, k}, r.nextDouble() - 0.5);
}
}
}
INDArray input = Nd4j.rand(DataType.DOUBLE, 1, nIn, timeSeriesLength).subi(0.5);
INDArray labels = Nd4j.zeros(1, s.labelWidth, timeSeriesLength);
INDArray labels = Nd4j.zeros(DataType.DOUBLE, 1, s.labelWidth, timeSeriesLength);
for (int m = 0; m < 1; m++) {
for (int j = 0; j < timeSeriesLength; j++) {
int idx = r.nextInt(s.labelWidth);
@ -127,15 +119,14 @@ public class GradientCheckTestsMasking extends BaseDL4JTest {
}
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345L)
.dataType(DataType.DOUBLE)
.list()
.layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize)
.dist(new NormalDistribution(0, 1))
.updater(new NoOp()).build())
.layer(1, new RnnOutputLayer.Builder(s.lf).activation(s.act).nIn(layerSize).nOut(s.nOut)
.dist(new NormalDistribution(0, 1))
.updater(new NoOp()).build())
.build();
.dataType(DataType.DOUBLE)
.updater(new NoOp())
.list()
.layer(0, new SimpleRnn.Builder().nIn(nIn).nOut(layerSize)
.weightInit(new NormalDistribution(0, 1)).build())
.layer(1, new RnnOutputLayer.Builder(s.lf).activation(s.act).nIn(layerSize).nOut(s.nOut)
.weightInit(new NormalDistribution(0, 1)).build())
.build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
@ -156,15 +147,14 @@ public class GradientCheckTestsMasking extends BaseDL4JTest {
int timeSeriesLength = 5;
int nIn = 5;
int layerSize = 4;
int layerSize = 3;
int nOut = 3;
int miniBatchSize = 3;
int miniBatchSize = 2;
INDArray[] masks = new INDArray[] {null,
Nd4j.create(new double[][] {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}),
Nd4j.create(new double[][] {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 0}, {1, 1, 1, 0, 0}}),
Nd4j.create(new double[][] {{1, 1, 1, 1, 1}, {0, 1, 1, 1, 1}, {0, 0, 1, 1, 1}})};
INDArray[] masks = new INDArray[] {
Nd4j.create(new double[][] {{1, 1, 1, 1, 1}, {1, 1, 1, 0, 0}}),
Nd4j.create(new double[][] {{1, 1, 1, 1, 1}, {0, 1, 1, 1, 1}})};
int testNum = 0;
for (INDArray mask : masks) {
@ -201,7 +191,7 @@ public class GradientCheckTestsMasking extends BaseDL4JTest {
}
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels, mask, mask);
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels, mask, mask, true, 16);
assertTrue(gradOK);
TestUtils.testModelSerialization(mln);
@ -295,9 +285,9 @@ public class GradientCheckTestsMasking extends BaseDL4JTest {
//For RNNs: per-output masking uses 3d masks (same shape as output/labels), as compared to the standard
// 2d masks (used for per *example* masking)
int nIn = 4;
int layerSize = 4;
int nOut = 4;
int nIn = 3;
int layerSize = 3;
int nOut = 2;
//1 example, TS length 3
INDArray mask1 = Nd4j.create(new double[] {1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0}, new int[] {1, nOut, 3}, 'f');
@ -358,7 +348,7 @@ public class GradientCheckTestsMasking extends BaseDL4JTest {
.dataType(DataType.DOUBLE)
.dist(new NormalDistribution(0, 1)).seed(12345)
.list()
.layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH)
.layer(0, new SimpleRnn.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH)
.build())
.layer(1, new RnnOutputLayer.Builder().nIn(layerSize).nOut(nOut).lossFunction(lf)
.activation(a).build())
@ -390,7 +380,7 @@ public class GradientCheckTestsMasking extends BaseDL4JTest {
.dataType(DataType.DOUBLE)
.dist(new NormalDistribution(0, 2)).seed(12345)
.graphBuilder().addInputs("in")
.addLayer("0", new GravesLSTM.Builder().nIn(nIn).nOut(layerSize)
.addLayer("0", new SimpleRnn.Builder().nIn(nIn).nOut(layerSize)
.activation(Activation.TANH).build(), "in")
.addLayer("1", new RnnOutputLayer.Builder().nIn(layerSize).nOut(nOut).lossFunction(lf)
.activation(a).build(), "0")

View File

@ -139,11 +139,11 @@ public class LSTMGradientCheckTests extends BaseDL4JTest {
@Test
public void testGradientLSTMFull() {
int timeSeriesLength = 8;
int nIn = 7;
int layerSize = 9;
int nOut = 4;
int miniBatchSize = 6;
int timeSeriesLength = 4;
int nIn = 3;
int layerSize = 4;
int nOut = 2;
int miniBatchSize = 2;
boolean[] gravesLSTM = new boolean[] {true, false};
@ -162,13 +162,13 @@ public class LSTMGradientCheckTests extends BaseDL4JTest {
//use l2vals[i] with l1vals[i]
double[] l2vals = {0.4, 0.0, 0.4, 0.4};
double[] l1vals = {0.0, 0.0, 0.5, 0.0};
double[] biasL2 = {0.0, 0.0, 0.0, 0.2};
double[] biasL1 = {0.0, 0.0, 0.6, 0.0};
Activation[] activFns = {Activation.TANH, Activation.SOFTSIGN, Activation.TANH, Activation.TANH};
LossFunction[] lossFunctions = {LossFunction.MCXENT, LossFunction.MSE, LossFunction.MSE, LossFunction.MCXENT};
Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH, Activation.IDENTITY, Activation.SOFTMAX};
double[] l2vals = {0.4, 0.0};
double[] l1vals = {0.0, 0.5};
double[] biasL2 = {0.3, 0.0};
double[] biasL1 = {0.0, 0.6};
Activation[] activFns = {Activation.TANH, Activation.SOFTSIGN};
LossFunction[] lossFunctions = {LossFunction.MCXENT, LossFunction.MSE};
Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH};
for (int i = 0; i < l2vals.length; i++) {
@ -218,7 +218,7 @@ public class LSTMGradientCheckTests extends BaseDL4JTest {
}
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels, null, null, true, 128);
assertTrue(testName, gradOK);
TestUtils.testModelSerialization(mln);
@ -233,9 +233,9 @@ public class LSTMGradientCheckTests extends BaseDL4JTest {
int[] timeSeriesLength = {1, 5, 1};
int[] miniBatchSize = {7, 1, 1};
int nIn = 7;
int layerSize = 9;
int nOut = 4;
int nIn = 3;
int layerSize = 4;
int nOut = 2;
boolean[] gravesLSTM = new boolean[] {true, false};
@ -244,22 +244,9 @@ public class LSTMGradientCheckTests extends BaseDL4JTest {
for (int i = 0; i < timeSeriesLength.length; i++) {
Random r = new Random(12345L);
INDArray input = Nd4j.zeros(miniBatchSize[i], nIn, timeSeriesLength[i]);
for (int m = 0; m < miniBatchSize[i]; m++) {
for (int j = 0; j < nIn; j++) {
for (int k = 0; k < timeSeriesLength[i]; k++) {
input.putScalar(new int[] {m, j, k}, r.nextDouble() - 0.5);
}
}
}
INDArray input = Nd4j.rand(DataType.DOUBLE, miniBatchSize[i], nIn, timeSeriesLength[i]);
INDArray labels = Nd4j.zeros(miniBatchSize[i], nOut, timeSeriesLength[i]);
for (int m = 0; m < miniBatchSize[i]; m++) {
for (int j = 0; j < timeSeriesLength[i]; j++) {
int idx = r.nextInt(nOut);
labels.putScalar(new int[] {m, idx, j}, 1.0f);
}
}
INDArray labels = TestUtils.randomOneHotTimeSeries(miniBatchSize[i], nOut, timeSeriesLength[i]);
Layer layer;
if (graves) {
@ -296,91 +283,75 @@ public class LSTMGradientCheckTests extends BaseDL4JTest {
LossFunction[] lossFunctions = {LossFunction.MCXENT, LossFunction.MSE};
Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here
int timeSeriesLength = 4;
int timeSeriesLength = 3;
int nIn = 2;
int layerSize = 2;
int nOut = 2;
int miniBatchSize = 3;
Random r = new Random(12345L);
INDArray input = Nd4j.zeros(miniBatchSize, nIn, timeSeriesLength);
for (int i = 0; i < miniBatchSize; i++) {
for (int j = 0; j < nIn; j++) {
for (int k = 0; k < timeSeriesLength; k++) {
input.putScalar(new int[] {i, j, k}, r.nextDouble() - 0.5);
}
}
}
INDArray labels = Nd4j.zeros(miniBatchSize, nOut, timeSeriesLength);
for (int i = 0; i < miniBatchSize; i++) {
for (int j = 0; j < timeSeriesLength; j++) {
int idx = r.nextInt(nOut);
labels.putScalar(new int[] {i, idx, j}, 1.0f);
}
}
INDArray input = Nd4j.rand(DataType.DOUBLE, miniBatchSize, nIn, timeSeriesLength).subi(0.5);
INDArray labels = TestUtils.randomOneHotTimeSeries(miniBatchSize, nOut, timeSeriesLength);
//use l2vals[i] with l1vals[i]
double[] l2vals = {0.4, 0.0, 0.4, 0.4};
double[] l1vals = {0.0, 0.0, 0.5, 0.0};
double[] biasL2 = {0.0, 0.0, 0.0, 0.2};
double[] biasL1 = {0.0, 0.0, 0.6, 0.0};
double[] l2vals = {0.4, 0.0};
double[] l1vals = {0.5, 0.0};
double[] biasL2 = {0.0, 0.2};
double[] biasL1 = {0.0, 0.6};
for (Activation afn : activFns) {
for (int i = 0; i < lossFunctions.length; i++) {
for (int k = 0; k < l2vals.length; k++) {
LossFunction lf = lossFunctions[i];
Activation outputActivation = outputActivations[i];
double l2 = l2vals[k];
double l1 = l1vals[k];
for (int i = 0; i < lossFunctions.length; i++) {
for (int k = 0; k < l2vals.length; k++) {
Activation afn = activFns[i];
LossFunction lf = lossFunctions[i];
Activation outputActivation = outputActivations[i];
double l2 = l2vals[k];
double l1 = l1vals[k];
NeuralNetConfiguration.Builder conf =
new NeuralNetConfiguration.Builder();
if (l1 > 0.0)
conf.l1(l1);
if (l2 > 0.0)
conf.l2(l2);
if (biasL2[k] > 0)
conf.l2Bias(biasL2[k]);
if (biasL1[k] > 0)
conf.l1Bias(biasL1[k]);
NeuralNetConfiguration.Builder conf =
new NeuralNetConfiguration.Builder();
if (l1 > 0.0)
conf.l1(l1);
if (l2 > 0.0)
conf.l2(l2);
if (biasL2[k] > 0)
conf.l2Bias(biasL2[k]);
if (biasL1[k] > 0)
conf.l1Bias(biasL1[k]);
MultiLayerConfiguration mlc = conf.seed(12345L)
.dataType(DataType.DOUBLE)
.list().layer(0,
new GravesBidirectionalLSTM.Builder().nIn(nIn).nOut(layerSize)
.dist(new NormalDistribution(0, 1))
.activation(afn).updater(
Updater.NONE)
.build())
.layer(1, new RnnOutputLayer.Builder(lf).activation(outputActivation).nIn(layerSize)
.nOut(nOut)
.dist(new NormalDistribution(0, 1)).updater(new NoOp()).build())
.build();
MultiLayerConfiguration mlc = conf.seed(12345L)
.dataType(DataType.DOUBLE)
.updater(new NoOp())
.list().layer(0,
new GravesBidirectionalLSTM.Builder().nIn(nIn).nOut(layerSize)
.weightInit(new NormalDistribution(0, 1))
.activation(afn)
.build())
.layer(1, new RnnOutputLayer.Builder(lf).activation(outputActivation).nIn(layerSize)
.nOut(nOut)
.dist(new NormalDistribution(0, 1)).updater(new NoOp()).build())
.build();
MultiLayerNetwork mln = new MultiLayerNetwork(mlc);
MultiLayerNetwork mln = new MultiLayerNetwork(mlc);
mln.init();
mln.init();
if (PRINT_RESULTS) {
System.out.println("testGradientGravesBidirectionalLSTMFull() - activationFn=" + afn
+ ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", l2=" + l2
+ ", l1=" + l1);
for (int j = 0; j < mln.getnLayers(); j++)
System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
String msg = "testGradientGravesLSTMFull() - activationFn=" + afn + ", lossFn=" + lf
+ ", outputActivation=" + outputActivation + ", l2=" + l2 + ", l1=" + l1;
assertTrue(msg, gradOK);
TestUtils.testModelSerialization(mln);
if (PRINT_RESULTS) {
System.out.println("testGradientGravesBidirectionalLSTMFull() - activationFn=" + afn
+ ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", l2=" + l2
+ ", l1=" + l1);
for (int j = 0; j < mln.getnLayers(); j++)
System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
String msg = "testGradientGravesLSTMFull() - activationFn=" + afn + ", lossFn=" + lf
+ ", outputActivation=" + outputActivation + ", l2=" + l2 + ", l1=" + l1;
assertTrue(msg, gradOK);
TestUtils.testModelSerialization(mln);
}
}
}
@ -391,21 +362,14 @@ public class LSTMGradientCheckTests extends BaseDL4JTest {
int[] timeSeriesLength = {1, 5, 1};
int[] miniBatchSize = {7, 1, 1};
int nIn = 7;
int layerSize = 9;
int nOut = 4;
int nIn = 3;
int layerSize = 4;
int nOut = 2;
for (int i = 0; i < timeSeriesLength.length; i++) {
Random r = new Random(12345L);
INDArray input = Nd4j.zeros(miniBatchSize[i], nIn, timeSeriesLength[i]);
for (int m = 0; m < miniBatchSize[i]; m++) {
for (int j = 0; j < nIn; j++) {
for (int k = 0; k < timeSeriesLength[i]; k++) {
input.putScalar(new int[] {m, j, k}, r.nextDouble() - 0.5);
}
}
}
INDArray input = Nd4j.rand(DataType.DOUBLE, miniBatchSize[i], nIn, timeSeriesLength[i]).subi(0.5);
INDArray labels = Nd4j.zeros(miniBatchSize[i], nOut, timeSeriesLength[i]);
for (int m = 0; m < miniBatchSize[i]; m++) {
@ -431,7 +395,7 @@ public class LSTMGradientCheckTests extends BaseDL4JTest {
mln.init();
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels, null, null, true, 128);
String msg = "testGradientGravesLSTMEdgeCases() - timeSeriesLength=" + timeSeriesLength[i]
+ ", miniBatchSize=" + miniBatchSize[i];
@ -445,11 +409,11 @@ public class LSTMGradientCheckTests extends BaseDL4JTest {
//Test gradients with CNN -> FF -> LSTM -> RnnOutputLayer
//time series input/output (i.e., video classification or similar)
int nChannelsIn = 3;
int inputSize = 10 * 10 * nChannelsIn; //10px x 10px x 3 channels
int miniBatchSize = 4;
int timeSeriesLength = 10;
int nClasses = 3;
int nChannelsIn = 2;
int inputSize = 6 * 6 * nChannelsIn; //10px x 10px x 3 channels
int miniBatchSize = 2;
int timeSeriesLength = 4;
int nClasses = 2;
//Generate
Nd4j.getRandom().setSeed(12345);
@ -467,18 +431,18 @@ public class LSTMGradientCheckTests extends BaseDL4JTest {
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new NoOp()).seed(12345)
.dataType(DataType.DOUBLE)
.dist(new UniformDistribution(-2, 2)).list()
.layer(0, new ConvolutionLayer.Builder(5, 5).nIn(3).nOut(5).stride(1, 1)
.layer(0, new ConvolutionLayer.Builder(3, 3).nIn(2).nOut(3).stride(1, 1)
.activation(Activation.TANH).build()) //Out: (10-5)/1+1 = 6 -> 6x6x5
.layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2)
.stride(1, 1).build()) //Out: (6-2)/1+1 = 5 -> 5x5x5
.layer(2, new DenseLayer.Builder().nIn(5 * 5 * 5).nOut(4).activation(Activation.TANH).build())
.layer(2, new DenseLayer.Builder().nIn(27).nOut(4).activation(Activation.TANH).build())
.layer(3, new GravesLSTM.Builder().nIn(4).nOut(3).activation(Activation.TANH).build())
.layer(4, new RnnOutputLayer.Builder().lossFunction(LossFunction.MCXENT).nIn(3).nOut(nClasses)
.activation(Activation.SOFTMAX).build())
.setInputType(InputType.convolutional(10, 10, 3)).build();
.setInputType(InputType.convolutional(6, 6, 2)).build();
//Here: ConvolutionLayerSetup in config builder doesn't know that we are expecting time series input, not standard FF input -> override it here
conf.getInputPreProcessors().put(0, new RnnToCnnPreProcessor(10, 10, 3));
conf.getInputPreProcessors().put(0, new RnnToCnnPreProcessor(6, 6, 2));
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
@ -489,7 +453,7 @@ public class LSTMGradientCheckTests extends BaseDL4JTest {
}
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels, null, null, true, 32);
assertTrue(gradOK);
TestUtils.testModelSerialization(mln);
}

View File

@ -68,6 +68,8 @@ public class RnnGradientChecks extends BaseDL4JTest {
for (boolean inputMask : new boolean[]{false, true}) {
for (boolean simple : new boolean[]{false, true}) {
for(boolean hasLayerNorm: new boolean[]{true, false}) {
if(!simple && hasLayerNorm)
continue;
INDArray in = Nd4j.rand(new int[]{mb, nIn, tsLength});
INDArray labels = Nd4j.create(mb, nOut, tsLength);
@ -93,6 +95,11 @@ public class RnnGradientChecks extends BaseDL4JTest {
}
for (Bidirectional.Mode m : modes) {
//Skip 3 of 4 test cases: from 64 to 16, which still should be good coverage
//Note RNG seed - deterministic run-to-run
if(r.nextInt(4) != 0)
continue;
String name = "mb=" + mb + ", maskType=" + maskType + ", mode=" + m + ", hasLayerNorm=" + hasLayerNorm + ", rnnType="
+ (simple ? "SimpleRnn" : "LSTM");
@ -144,6 +151,9 @@ public class RnnGradientChecks extends BaseDL4JTest {
for (boolean inputMask : new boolean[]{false, true}) {
for (boolean hasLayerNorm : new boolean[]{true, false}) {
for (int l = 0; l < l1s.length; l++) {
//Only run 1 of 5 (on average - note RNG seed for deterministic testing) - 25 of 128 test cases (to minimize test time)
if(r.nextInt(5) != 0)
continue;
INDArray in = Nd4j.rand(new int[]{mb, nIn, tsLength});
INDArray labels = Nd4j.create(mb, nOut, tsLength);
@ -217,6 +227,8 @@ public class RnnGradientChecks extends BaseDL4JTest {
for (boolean inputMask : new boolean[]{false, true}) {
for (boolean simple : new boolean[]{false, true}) {
for (boolean hasLayerNorm : new boolean[]{true, false}) {
if(!simple && hasLayerNorm)
continue;
INDArray in = Nd4j.rand(new int[]{mb, nIn, tsLength});
@ -265,7 +277,7 @@ public class RnnGradientChecks extends BaseDL4JTest {
net.init();
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, in, labels, inMask, null);
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, in, labels, inMask, null, true, 16);
assertTrue(name, gradOK);
TestUtils.testModelSerialization(net);
}

View File

@ -26,6 +26,7 @@ import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.*;
import org.deeplearning4j.nn.conf.layers.misc.FrozenLayerWithBackprop;
import org.deeplearning4j.nn.conf.layers.recurrent.SimpleRnn;
import org.deeplearning4j.nn.conf.layers.util.MaskLayer;
import org.deeplearning4j.nn.graph.ComputationGraph;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
@ -60,9 +61,9 @@ public class UtilLayerGradientChecks extends BaseDL4JTest {
@Test
public void testMaskLayer() {
Nd4j.getRandom().setSeed(12345);
int tsLength = 5;
int tsLength = 3;
for(int minibatch : new int[]{1,8}) {
for(int minibatch : new int[]{1,3}) {
for (int inputRank : new int[]{2, 3, 4}) {
for (boolean inputMask : new boolean[]{false, true}) {
String maskType = (inputMask ? "inputMask" : "none");
@ -74,7 +75,7 @@ public class UtilLayerGradientChecks extends BaseDL4JTest {
if(minibatch == 1){
inMask = Nd4j.ones(1,1);
} else {
inMask = Nd4j.create(minibatch, 1);
inMask = Nd4j.create(DataType.DOUBLE, minibatch, 1);
Nd4j.getExecutioner().exec(new BernoulliDistribution(inMask, 0.5));
int count = inMask.sumNumber().intValue();
assertTrue(count >= 0 && count <= minibatch); //Sanity check on RNG seed
@ -83,16 +84,16 @@ public class UtilLayerGradientChecks extends BaseDL4JTest {
case 4:
//Per-example mask (broadcast along all channels/x/y)
if(minibatch == 1){
inMask = Nd4j.ones(1,1, 1, 1);
inMask = Nd4j.ones(DataType.DOUBLE, 1,1, 1, 1);
} else {
inMask = Nd4j.create(minibatch, 1, 1, 1);
inMask = Nd4j.create(DataType.DOUBLE, minibatch, 1, 1, 1);
Nd4j.getExecutioner().exec(new BernoulliDistribution(inMask, 0.5));
int count = inMask.sumNumber().intValue();
assertTrue(count >= 0 && count <= minibatch); //Sanity check on RNG seed
}
break;
case 3:
inMask = Nd4j.ones(minibatch, tsLength);
inMask = Nd4j.ones(DataType.DOUBLE, minibatch, tsLength);
for( int i=0; i<minibatch; i++ ){
for( int j=i+1; j<tsLength; j++ ){
inMask.putScalar(i,j,0.0);
@ -108,11 +109,11 @@ public class UtilLayerGradientChecks extends BaseDL4JTest {
int[] labelShape;
switch (inputRank){
case 2:
inShape = new int[]{minibatch, 5};
inShape = new int[]{minibatch, 3};
labelShape = inShape;
break;
case 3:
inShape = new int[]{minibatch, 5, tsLength};
inShape = new int[]{minibatch, 3, tsLength};
labelShape = inShape;
break;
case 4:
@ -134,18 +135,18 @@ public class UtilLayerGradientChecks extends BaseDL4JTest {
InputType it;
switch (inputRank){
case 2:
l1 = new DenseLayer.Builder().nOut(5).build();
l2 = new DenseLayer.Builder().nOut(5).build();
l3 = new OutputLayer.Builder().nOut(5).lossFunction(LossFunctions.LossFunction.MSE)
l1 = new DenseLayer.Builder().nOut(3).build();
l2 = new DenseLayer.Builder().nOut(3).build();
l3 = new OutputLayer.Builder().nOut(3).lossFunction(LossFunctions.LossFunction.MSE)
.activation(Activation.TANH).build();
it = InputType.feedForward(5);
it = InputType.feedForward(3);
break;
case 3:
l1 = new LSTM.Builder().nIn(5).nOut(5).activation(Activation.TANH).build();
l2 = new LSTM.Builder().nIn(5).nOut(5).activation(Activation.TANH).build();
l3 = new RnnOutputLayer.Builder().nIn(5).nOut(5).lossFunction(LossFunctions.LossFunction.SQUARED_LOSS)
l1 = new SimpleRnn.Builder().nIn(3).nOut(3).activation(Activation.TANH).build();
l2 = new SimpleRnn.Builder().nIn(3).nOut(3).activation(Activation.TANH).build();
l3 = new RnnOutputLayer.Builder().nIn(3).nOut(3).lossFunction(LossFunctions.LossFunction.SQUARED_LOSS)
.activation(Activation.IDENTITY).build();
it = InputType.recurrent(5);
it = InputType.recurrent(3);
break;
case 4:
l1 = new ConvolutionLayer.Builder().nOut(5).convolutionMode(ConvolutionMode.Truncate)

View File

@ -138,28 +138,23 @@ public class VaeGradientCheckTests extends BaseDL4JTest {
@Test
public void testVaePretrain() {
Nd4j.getRandom().setSeed(12345);
Activation[] activFns = {Activation.IDENTITY, Activation.TANH, Activation.IDENTITY, Activation.TANH};
LossFunction[] lossFunctions = {LossFunction.MCXENT, LossFunction.MCXENT, LossFunction.MSE, LossFunction.MSE};
Activation[] outputActivations = {Activation.SOFTMAX, Activation.SOFTMAX, Activation.TANH, Activation.TANH};
Activation[] pzxAfns = {Activation.IDENTITY, Activation.TANH, Activation.IDENTITY, Activation.TANH};
Activation[] pxzAfns = {Activation.TANH, Activation.IDENTITY, Activation.TANH, Activation.TANH};
Activation[] activFns = {Activation.IDENTITY, Activation.TANH, Activation.SOFTSIGN};
Activation[] pzxAfns = {Activation.IDENTITY, Activation.IDENTITY, Activation.TANH};
Activation[] pxzAfns = {Activation.TANH, Activation.TANH, Activation.IDENTITY};
//use l2vals[i] with l1vals[i]
double[] l2vals = {0.4, 0.0, 0.4, 0.4};
double[] l1vals = {0.0, 0.0, 0.5, 0.0};
double[] biasL2 = {0.0, 0.0, 0.0, 0.2};
double[] biasL1 = {0.0, 0.0, 0.6, 0.0};
double[] l2vals = {0.0, 0.4, 0.4};
double[] l1vals = {0.0, 0.5, 0.0};
double[] biasL2 = {0.0, 0.0, 0.2};
double[] biasL1 = {0.0, 0.6, 0.0};
int[][] encoderLayerSizes = new int[][] {{5}, {5}, {5, 6}, {5, 6}};
int[][] decoderLayerSizes = new int[][] {{6}, {7, 8}, {6}, {7, 8}};
int[][] encoderLayerSizes = new int[][] {{5}, {3, 4}, {3, 4}};
int[][] decoderLayerSizes = new int[][] {{4}, {2}, {4, 3}};
int[] minibatches = new int[]{1,5,4,3};
int[] minibatches = new int[]{1,3,2,3};
Nd4j.getRandom().setSeed(12345);
for( int i=0; i<activFns.length; i++ ){
LossFunction lf = lossFunctions[i];
Activation outputActivation = outputActivations[i];
double l2 = l2vals[i];
double l1 = l1vals[i];
int[] encoderSizes = encoderLayerSizes[i];
@ -214,18 +209,18 @@ public class VaeGradientCheckTests extends BaseDL4JTest {
@Test
public void testVaePretrainReconstructionDistributions() {
int inOutSize = 6;
int inOutSize = 3;
ReconstructionDistribution[] reconstructionDistributions =
new ReconstructionDistribution[]{new GaussianReconstructionDistribution(Activation.IDENTITY),
new GaussianReconstructionDistribution(Activation.TANH),
new BernoulliReconstructionDistribution(Activation.SIGMOID),
new CompositeReconstructionDistribution.Builder()
.addDistribution(2,
.addDistribution(1,
new GaussianReconstructionDistribution(
Activation.IDENTITY))
.addDistribution(2, new BernoulliReconstructionDistribution())
.addDistribution(2,
.addDistribution(1, new BernoulliReconstructionDistribution())
.addDistribution(1,
new GaussianReconstructionDistribution(
Activation.TANH))
.build(),
@ -248,12 +243,12 @@ public class VaeGradientCheckTests extends BaseDL4JTest {
break;
case 3: //Composite
data = Nd4j.create(minibatch, inOutSize);
data.get(NDArrayIndex.all(), NDArrayIndex.interval(0, 2)).assign(Nd4j.rand(minibatch, 2));
data.get(NDArrayIndex.all(), NDArrayIndex.interval(0, 1)).assign(Nd4j.rand(minibatch, 1));
Nd4j.getExecutioner()
.exec(new BernoulliDistribution(
data.get(NDArrayIndex.all(), NDArrayIndex.interval(2, 4)), 0.5),
data.get(NDArrayIndex.all(), NDArrayIndex.interval(1, 2)), 0.5),
Nd4j.getRandom());
data.get(NDArrayIndex.all(), NDArrayIndex.interval(4, 6)).assign(Nd4j.rand(minibatch, 2));
data.get(NDArrayIndex.all(), NDArrayIndex.interval(2, 3)).assign(Nd4j.rand(minibatch, 1));
break;
case 4:
case 5:
@ -269,7 +264,7 @@ public class VaeGradientCheckTests extends BaseDL4JTest {
.seed(12345L).dist(new NormalDistribution(0, 1))
.list().layer(0,
new VariationalAutoencoder.Builder().nIn(inOutSize).nOut(3)
.encoderLayerSizes(5).decoderLayerSizes(6)
.encoderLayerSizes(4).decoderLayerSizes(3)
.pzxActivationFunction(Activation.TANH)
.reconstructionDistribution(
reconstructionDistributions[i])
@ -304,17 +299,15 @@ public class VaeGradientCheckTests extends BaseDL4JTest {
int minibatch = 2;
Nd4j.getRandom().setSeed(12345);
for (int numSamples : new int[]{1, 3}) {
// for (int numSamples : new int[]{10}) {
INDArray features = Nd4j.rand(minibatch, 4);
for (int numSamples : new int[]{1, 2}) {
INDArray features = Nd4j.rand(DataType.DOUBLE, minibatch, 4);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().l2(0.2).l1(0.3)
.dataType(DataType.DOUBLE)
.updater(new NoOp())
.seed(12345L).weightInit(WeightInit.XAVIER).list()
.layer(0, new VariationalAutoencoder.Builder().nIn(4).nOut(3).encoderLayerSizes(5, 6)
.decoderLayerSizes(7, 8).pzxActivationFunction(Activation.TANH)
.layer(0, new VariationalAutoencoder.Builder().nIn(4).nOut(3).encoderLayerSizes(2, 3)
.decoderLayerSizes(4, 3).pzxActivationFunction(Activation.TANH)
.reconstructionDistribution(
new GaussianReconstructionDistribution(Activation.TANH))
.numSamples(numSamples).activation(Activation.TANH)

View File

@ -72,9 +72,6 @@ public class YoloGradientCheckTests extends BaseDL4JTest {
@Test
public void testYoloOutputLayer() {
int depthIn = 2;
int[] minibatchSizes = {1, 3};
int[] widths = new int[]{4, 7};
int[] heights = new int[]{4, 5};
int c = 3;
int b = 3;
@ -83,52 +80,51 @@ public class YoloGradientCheckTests extends BaseDL4JTest {
Nd4j.getRandom().setSeed(1234567);
int[] minibatchSizes = {1, 3};
int[] widths = new int[]{4, 7};
int[] heights = new int[]{4, 5};
double[] l1 = new double[]{0.0, 0.3};
double[] l2 = new double[]{0.0, 0.4};
for( int wh = 0; wh<widths.length; wh++ ) {
for( int i = 0; i<widths.length; i++ ) {
int w = widths[wh];
int h = heights[wh];
int w = widths[i];
int h = heights[i];
int mb = minibatchSizes[i];
Nd4j.getRandom().setSeed(12345);
INDArray bbPrior = Nd4j.rand(b, 2).muliRowVector(Nd4j.create(new double[]{w, h})).addi(0.1);
for (int mb : minibatchSizes) {
for (int i = 0; i < l1.length; i++) {
Nd4j.getRandom().setSeed(12345);
Nd4j.getRandom().setSeed(12345);
INDArray input = Nd4j.rand(new int[]{mb, depthIn, h, w});
INDArray labels = yoloLabels(mb, c, h, w);
INDArray input = Nd4j.rand(new int[]{mb, depthIn, h, w});
INDArray labels = yoloLabels(mb, c, h, w);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
.dataType(DataType.DOUBLE)
.updater(new NoOp())
.activation(a)
.l1(l1[i]).l2(l2[i])
.convolutionMode(ConvolutionMode.Same)
.list()
.layer(new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1)
.nIn(depthIn).nOut(yoloDepth).build())//output: (5-2+0)/1+1 = 4
.layer(new Yolo2OutputLayer.Builder()
.boundingBoxPriors(bbPrior)
.build())
.build();
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
.dataType(DataType.DOUBLE)
.updater(new NoOp())
.activation(a)
.l1(l1[i]).l2(l2[i])
.convolutionMode(ConvolutionMode.Same)
.list()
.layer(new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1)
.nIn(depthIn).nOut(yoloDepth).build())//output: (5-2+0)/1+1 = 4
.layer(new Yolo2OutputLayer.Builder()
.boundingBoxPriors(bbPrior)
.build())
.build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
String msg = "testYoloOutputLayer() - minibatch = " + mb + ", w=" + w + ", h=" + h + ", l1=" + l1[i] + ", l2=" + l2[i];
System.out.println(msg);
String msg = "testYoloOutputLayer() - minibatch = " + mb + ", w=" + w + ", h=" + h + ", l1=" + l1[i] + ", l2=" + l2[i];
System.out.println(msg);
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels, null, null, true, 100);
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(msg, gradOK);
TestUtils.testModelSerialization(net);
}
}
assertTrue(msg, gradOK);
TestUtils.testModelSerialization(net);
}
}
@ -233,7 +229,7 @@ public class YoloGradientCheckTests extends BaseDL4JTest {
INDArray l = ds.getLabels();
boolean ok = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, f, l);
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, f, l, null, null, true, 64);
assertTrue(ok);
TestUtils.testModelSerialization(net);

View File

@ -446,7 +446,7 @@ public class DTypeTests extends BaseDL4JTest {
.layer(new ActivationLayer(Activation.LEAKYRELU))
.layer(secondLast)
.layer(ol)
.setInputType(InputType.convolutionalFlat(28, 28, 1))
.setInputType(InputType.convolutionalFlat(8, 8, 1))
.build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
@ -457,16 +457,16 @@ public class DTypeTests extends BaseDL4JTest {
assertEquals(msg, networkDtype, net.getFlattenedGradients().dataType());
assertEquals(msg, networkDtype, net.getUpdater(true).getStateViewArray().dataType());
INDArray in = Nd4j.rand(networkDtype, 2, 28 * 28);
INDArray in = Nd4j.rand(networkDtype, 2, 8 * 8);
INDArray label;
if (outputLayer < 3) {
label = TestUtils.randomOneHot(2, 10).castTo(networkDtype);
} else if (outputLayer == 3) {
//CNN loss
label = Nd4j.rand(networkDtype, 2, 3, 28, 28);
label = Nd4j.rand(networkDtype, 2, 3, 8, 8);
} else if (outputLayer == 4) {
//YOLO
label = Nd4j.ones(networkDtype, 2, 6, 28, 28);
label = Nd4j.ones(networkDtype, 2, 6, 8, 8);
} else {
throw new IllegalStateException();
}
@ -550,7 +550,7 @@ public class DTypeTests extends BaseDL4JTest {
.layer(new Upsampling3D.Builder().size(2).build())
.layer(secondLast)
.layer(ol)
.setInputType(InputType.convolutional3D(Convolution3D.DataFormat.NCDHW, 28, 28, 28, 1))
.setInputType(InputType.convolutional3D(Convolution3D.DataFormat.NCDHW, 8, 8, 8, 1))
.build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
@ -561,13 +561,13 @@ public class DTypeTests extends BaseDL4JTest {
assertEquals(msg, networkDtype, net.getFlattenedGradients().dataType());
assertEquals(msg, networkDtype, net.getUpdater(true).getStateViewArray().dataType());
INDArray in = Nd4j.rand(networkDtype, 2, 1, 28, 28, 28);
INDArray in = Nd4j.rand(networkDtype, 2, 1, 8, 8, 8);
INDArray label;
if (outputLayer == 0) {
label = TestUtils.randomOneHot(2, 10).castTo(networkDtype);
} else if (outputLayer == 1) {
//CNN3D loss
label = Nd4j.rand(networkDtype, 2, 3, 28, 28, 28);
label = Nd4j.rand(networkDtype, 2, 3, 8, 8, 8);
} else if (outputLayer == 2) {
label = TestUtils.randomOneHot(2, 10).castTo(networkDtype);
} else {
@ -787,15 +787,15 @@ public class DTypeTests extends BaseDL4JTest {
switch (outputLayer) {
case 0:
ol = new RnnOutputLayer.Builder().nOut(5).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build();
secondLast = new LSTM.Builder().nOut(5).activation(Activation.TANH).build();
secondLast = new SimpleRnn.Builder().nOut(5).activation(Activation.TANH).build();
break;
case 1:
ol = new RnnLossLayer.Builder().activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build();
secondLast = new LSTM.Builder().nOut(5).activation(Activation.TANH).build();
secondLast = new SimpleRnn.Builder().nOut(5).activation(Activation.TANH).build();
break;
case 2:
ol = new OutputLayer.Builder().nOut(5).build();
secondLast = new LastTimeStep(new LSTM.Builder().nOut(5).activation(Activation.TANH).build());
secondLast = new LastTimeStep(new SimpleRnn.Builder().nOut(5).activation(Activation.TANH).build());
break;
default:
throw new RuntimeException();
@ -825,12 +825,12 @@ public class DTypeTests extends BaseDL4JTest {
assertEquals(msg, networkDtype, net.getFlattenedGradients().dataType());
assertEquals(msg, networkDtype, net.getUpdater(true).getStateViewArray().dataType());
INDArray in = Nd4j.rand(networkDtype, 2, 5, 4);
INDArray in = Nd4j.rand(networkDtype, 2, 5, 2);
INDArray label;
if (outputLayer == 2) {
label = TestUtils.randomOneHot(2, 5).castTo(networkDtype);
} else {
label = TestUtils.randomOneHotTimeSeries(2, 5, 4).castTo(networkDtype);
label = TestUtils.randomOneHotTimeSeries(2, 5, 2).castTo(networkDtype);
}
@ -845,7 +845,7 @@ public class DTypeTests extends BaseDL4JTest {
net.setLabels(label);
net.computeGradientAndScore();
net.fit(new DataSet(in, label, Nd4j.ones(networkDtype, 2, 4), outputLayer == 2 ? null : Nd4j.ones(networkDtype, 2, 4)));
net.fit(new DataSet(in, label, Nd4j.ones(networkDtype, 2, 2), outputLayer == 2 ? null : Nd4j.ones(networkDtype, 2, 2)));
logUsedClasses(net);
@ -1219,9 +1219,9 @@ public class DTypeTests extends BaseDL4JTest {
.addLayer("2", new LocallyConnected1D.Builder().kernelSize(2).nOut(4).build(), "1")
.addLayer("out", new RnnOutputLayer.Builder().nOut(10).build(), "2")
.setOutputs("out")
.setInputTypes(InputType.recurrent(5, 4));
in = new INDArray[]{Nd4j.rand(networkDtype, 2, 5, 4)};
label = TestUtils.randomOneHotTimeSeries(2, 10, 4);
.setInputTypes(InputType.recurrent(5, 2));
in = new INDArray[]{Nd4j.rand(networkDtype, 2, 5, 2)};
label = TestUtils.randomOneHotTimeSeries(2, 10, 2);
break;
case 1:
b.addInputs("in")
@ -1229,8 +1229,8 @@ public class DTypeTests extends BaseDL4JTest {
.addLayer("2", new LocallyConnected2D.Builder().kernelSize(2, 2).nOut(5).build(), "1")
.addLayer("out", new OutputLayer.Builder().nOut(10).build(), "2")
.setOutputs("out")
.setInputTypes(InputType.convolutional(28, 28, 1));
in = new INDArray[]{Nd4j.rand(networkDtype, 2, 1, 28, 28)};
.setInputTypes(InputType.convolutional(8, 8, 1));
in = new INDArray[]{Nd4j.rand(networkDtype, 2, 1, 8, 8)};
label = TestUtils.randomOneHot(2, 10).castTo(networkDtype);
break;
default:

View File

@ -31,6 +31,7 @@ import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
import org.deeplearning4j.nn.conf.layers.LossLayer;
import org.deeplearning4j.nn.conf.layers.PrimaryCapsules;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
import org.junit.Ignore;
import org.junit.Test;
import org.nd4j.evaluation.classification.Evaluation;
import org.nd4j.linalg.activations.impl.ActivationSoftmax;
@ -38,6 +39,7 @@ import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.learning.config.Adam;
import org.nd4j.linalg.lossfunctions.impl.LossNegativeLogLikelihood;
@Ignore("AB - ignored due to excessive runtime. Keep for manual debugging when required")
public class CapsNetMNISTTest extends BaseDL4JTest {
@Override

View File

@ -95,7 +95,7 @@ public class LocallyConnectedLayerTest extends BaseDL4JTest {
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).l2(2e-4)
.updater(new Nesterovs(0.9)).dropOut(0.5)
.list()
.layer(new LocallyConnected1D.Builder().kernelSize(8).nIn(3)
.layer(new LocallyConnected1D.Builder().kernelSize(4).nIn(3)
.stride(1).nOut(16).dropOut(0.5)
.convolutionMode(ConvolutionMode.Strict)
.setInputSize(28)
@ -104,19 +104,19 @@ public class LocallyConnectedLayerTest extends BaseDL4JTest {
.build())
.layer(new OutputLayer.Builder(LossFunctions.LossFunction.SQUARED_LOSS) //output layer
.nOut(10).weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).build())
.setInputType(InputType.recurrent(3, 28));
.setInputType(InputType.recurrent(3, 8));
MultiLayerConfiguration conf = builder.build();
MultiLayerNetwork network = new MultiLayerNetwork(conf);
network.init();
INDArray input = Nd4j.ones(10, 3, 28);
INDArray input = Nd4j.ones(10, 3, 8);
INDArray output = network.output(input, false);;
for (int i = 0; i < 100; i++) { // TODO: this falls flat for 1000 iterations on my machine
output = network.output(input, false);
}
assertArrayEquals(new long[] {(28 - 8 + 1) * 10, 10}, output.shape());
assertArrayEquals(new long[] {(8 - 4 + 1) * 10, 10}, output.shape());
network.fit(input, output);
}
@ -159,8 +159,10 @@ public class LocallyConnectedLayerTest extends BaseDL4JTest {
.addLayer("2", new LocallyConnected2D.Builder().kernelSize(2,2).nOut(5).build(), "1")
.addLayer("out", new OutputLayer.Builder().nOut(10).build(), "2")
.setOutputs("out")
.setInputTypes(InputType.convolutional(28, 28, 1));
in = new INDArray[]{Nd4j.rand(networkDtype, 2, 1, 28, 28)};
// .setInputTypes(InputType.convolutional(28, 28, 1));
// in = new INDArray[]{Nd4j.rand(networkDtype, 2, 1, 28, 28)};
.setInputTypes(InputType.convolutional(8, 8, 1));
in = new INDArray[]{Nd4j.rand(networkDtype, 2, 1, 8, 8)};
label = TestUtils.randomOneHot(2, 10).castTo(networkDtype);
break;
default:

View File

@ -93,8 +93,6 @@ public class TestSameDiffConv extends BaseDL4JTest {
//Note: to avoid the exporential number of tests here, we'll randomly run every Nth test only.
//With n=1, m=3 this is 1 out of every 3 tests (on average)
Random r = new Random(12345);
int n = 1;
int m = 30; //1 ot of every 30... 3888 possible combinations here
for (int minibatch : new int[]{5, 1}) {
Activation[] afns = new Activation[]{
@ -117,11 +115,8 @@ public class TestSameDiffConv extends BaseDL4JTest {
for (int[] dilation : new int[][]{{1, 1}, {2, 2}, {1, 2}}) {
for (ConvolutionMode cm : new ConvolutionMode[]{ConvolutionMode.Truncate, ConvolutionMode.Same}) {
for (Activation a : afns) {
int i = r.nextInt(m);
if (i >= n) {
//Example: n=2, m=3... skip on i=2, run test on i=0, i=1
continue;
}
if(r.nextInt(80) != 0)
continue; //1 of 80 on average - of 3888 possible combinations here -> ~49 tests
String msg = "Test " + (count++) + " - minibatch=" + minibatch + ", nIn=" + nIn
+ ", nOut=" + nOut + ", kernel=" + Arrays.toString(kernel) + ", stride="
@ -306,7 +301,7 @@ public class TestSameDiffConv extends BaseDL4JTest {
log.info("Starting: " + msg);
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, f, l);
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, f, l, null, null, true, 50); //Most of weights are in output layer
assertTrue(msg, gradOK);

View File

@ -100,7 +100,7 @@ public class TestOptimizers extends BaseDL4JTest {
ds.normalizeZeroMeanZeroUnitVariance();
for (OptimizationAlgorithm oa : toTest) {
int nIter = 10;
int nIter = 5;
MultiLayerNetwork network = new MultiLayerNetwork(getMLPConfigIris(oa));
network.init();
double score = network.score(ds);
@ -109,7 +109,7 @@ public class TestOptimizers extends BaseDL4JTest {
if (PRINT_OPT_RESULTS)
System.out.println("testOptimizersMLP() - " + oa);
int nCallsToOptimizer = 30;
int nCallsToOptimizer = 10;
double[] scores = new double[nCallsToOptimizer + 1];
scores[0] = score;
for (int i = 0; i < nCallsToOptimizer; i++) {
@ -256,34 +256,6 @@ public class TestOptimizers extends BaseDL4JTest {
}
}
@Test
public void testSphereFnOptStochGradDescentMultipleSteps() {
//Earlier tests: only do a single line search, though each line search will do multiple iterations
// of line search algorithm.
//Here, do multiple optimization runs + multiple line search iterations within each run
//i.e., gradient is re-calculated at each step/run
//Single step tests earlier won't test storing of state between iterations
testSphereFnMultipleStepsHelper(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT, 100, 5);
}
@Test
public void testSphereFnOptLineGradDescentMultipleSteps() {
testSphereFnMultipleStepsHelper(OptimizationAlgorithm.LINE_GRADIENT_DESCENT, 100, 5);
}
@Test
public void testSphereFnOptCGMultipleSteps() {
testSphereFnMultipleStepsHelper(OptimizationAlgorithm.CONJUGATE_GRADIENT, 100, 5);
}
@Test
public void testSphereFnOptLBFGSMultipleSteps() {
testSphereFnMultipleStepsHelper(OptimizationAlgorithm.LBFGS, 100, 5);
}
private static void testSphereFnMultipleStepsHelper(OptimizationAlgorithm oa, int nOptIter,
int maxNumLineSearchIter) {
double[] scores = new double[nOptIter + 1];

View File

@ -58,8 +58,8 @@ public class ValidateCuDNN extends BaseDL4JTest {
int numClasses = 10;
//imageHeight,imageWidth,channels
int imageHeight = 240;
int imageWidth = 240;
int imageHeight = 64;
int imageWidth = 64;
int channels = 3;
IActivation activation = new ActivationIdentity();
MultiLayerConfiguration multiLayerConfiguration = new NeuralNetConfiguration.Builder()
@ -68,9 +68,9 @@ public class ValidateCuDNN extends BaseDL4JTest {
.activation(new ActivationELU())
.updater(new Nesterovs(1e-3, 0.9))
.list(
new Convolution2D.Builder().nOut(96)
.kernelSize(11, 11).biasInit(0.0)
.stride(4, 4).build(),
new Convolution2D.Builder().nOut(16)
.kernelSize(4, 4).biasInit(0.0)
.stride(2, 2).build(),
new ActivationLayer.Builder().activation(activation).build(),
new Pooling2D.Builder()
.poolingType(SubsamplingLayer.PoolingType.MAX)
@ -85,12 +85,12 @@ public class ValidateCuDNN extends BaseDL4JTest {
.poolingType(SubsamplingLayer.PoolingType.MAX)
.kernelSize(3, 3).stride(2, 2)
.build(),
new Convolution2D.Builder().nOut(384)
new Convolution2D.Builder().nOut(16)
.kernelSize(3, 3).padding(1, 1)
.biasInit(0.0)
.stride(1, 1).build(),
new ActivationLayer.Builder().activation(activation).build(),
new Convolution2D.Builder().nOut(256)
new Convolution2D.Builder().nOut(16)
.kernelSize(3, 3).padding(1, 1)
.stride(1, 1).build(),
new ActivationLayer.Builder().activation(activation).build(),
@ -99,7 +99,7 @@ public class ValidateCuDNN extends BaseDL4JTest {
.kernelSize(3, 3).stride(2, 2)
.build(),
new DenseLayer.Builder()
.nOut(4096)
.nOut(64)
.biasInit(0.0)
.build(),
new ActivationLayer.Builder().activation(activation).build(),
@ -114,8 +114,8 @@ public class ValidateCuDNN extends BaseDL4JTest {
MultiLayerNetwork net = new MultiLayerNetwork(multiLayerConfiguration);
net.init();
int[] fShape = new int[]{32, channels, imageHeight, imageWidth};
int[] lShape = new int[]{32, numClasses};
int[] fShape = new int[]{8, channels, imageHeight, imageWidth};
int[] lShape = new int[]{8, numClasses};
List<Class<?>> classesToTest = new ArrayList<>();
classesToTest.add(ConvolutionLayer.class);

View File

@ -144,12 +144,6 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
// (a) activation function
// (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
// (c) Loss function (with specified output activations)
Activation[] activFns = {Activation.SIGMOID, Activation.TANH};
boolean[] characteristic = {false, true}; //If true: run some backprop steps first
LossFunctions.LossFunction[] lossFunctions =
{LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE};
Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here
DataSet ds = new IrisDataSetIterator(150, 150).next();
ds.normalizeZeroMeanZeroUnitVariance();
@ -161,73 +155,74 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
double[] l1vals = {0.0, 0.0, 0.5, 0.0};
double[] biasL2 = {0.0, 0.0, 0.0, 0.2};
double[] biasL1 = {0.0, 0.0, 0.6, 0.0};
Activation[] activFns = {Activation.SIGMOID, Activation.TANH, Activation.ELU, Activation.SOFTPLUS};
boolean[] characteristic = {false, true, false, true}; //If true: run some backprop steps first
for (Activation afn : activFns) {
for (boolean doLearningFirst : characteristic) {
for (int i = 0; i < lossFunctions.length; i++) {
for (int k = 0; k < l2vals.length; k++) {
LossFunctions.LossFunction lf = lossFunctions[i];
Activation outputActivation = outputActivations[i];
double l2 = l2vals[k];
double l1 = l1vals[k];
LossFunctions.LossFunction[] lossFunctions =
{LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE, LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE};
Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH, Activation.SOFTMAX, Activation.IDENTITY}; //i.e., lossFunctions[i] used with outputActivations[i] here
MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder()
.dataType(DataType.DOUBLE)
.l2(l2).l1(l1).l2Bias(biasL2[k]).l1Bias(biasL1[k])
.optimizationAlgo(
OptimizationAlgorithm.CONJUGATE_GRADIENT)
.seed(12345L).list()
.layer(0, new ConvolutionLayer.Builder(new int[]{1, 1}).nIn(1).nOut(6)
.cudnnAllowFallback(false)
.weightInit(WeightInit.XAVIER).activation(afn)
.updater(new NoOp()).build())
.layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nOut(3)
.weightInit(WeightInit.XAVIER).updater(new NoOp()).build())
for( int i=0; i<l2vals.length; i++ ){
Activation afn = activFns[i];
boolean doLearningFirst = characteristic[i];
LossFunctions.LossFunction lf = lossFunctions[i];
Activation outputActivation = outputActivations[i];
double l2 = l2vals[i];
double l1 = l1vals[i];
.setInputType(InputType.convolutionalFlat(1, 4, 1));
MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder()
.dataType(DataType.DOUBLE)
.l2(l2).l1(l1).l2Bias(biasL2[i]).l1Bias(biasL1[i])
.optimizationAlgo(
OptimizationAlgorithm.CONJUGATE_GRADIENT)
.seed(12345L).list()
.layer(0, new ConvolutionLayer.Builder(new int[]{1, 1}).nIn(1).nOut(6)
.weightInit(WeightInit.XAVIER).activation(afn)
.updater(new NoOp()).build())
.layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nOut(3)
.weightInit(WeightInit.XAVIER).updater(new NoOp()).build())
MultiLayerConfiguration conf = builder.build();
.setInputType(InputType.convolutionalFlat(1, 4, 1));
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
String testName = new Object() {
}.getClass().getEnclosingMethod().getName();
MultiLayerConfiguration conf = builder.build();
if (doLearningFirst) {
//Run a number of iterations of learning
mln.setInput(ds.getFeatures());
mln.setLabels(ds.getLabels());
mln.computeGradientAndScore();
double scoreBefore = mln.score();
for (int j = 0; j < 10; j++)
mln.fit(ds);
mln.computeGradientAndScore();
double scoreAfter = mln.score();
//Can't test in 'characteristic mode of operation' if not learning
String msg = testName
+ "- score did not (sufficiently) decrease during learning - activationFn="
+ afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation
+ ", doLearningFirst=" + doLearningFirst + " (before=" + scoreBefore
+ ", scoreAfter=" + scoreAfter + ")";
assertTrue(msg, scoreAfter < 0.8 * scoreBefore);
}
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
String testName = new Object() {
}.getClass().getEnclosingMethod().getName();
if (PRINT_RESULTS) {
System.out.println(testName + "- activationFn=" + afn + ", lossFn=" + lf
+ ", outputActivation=" + outputActivation + ", doLearningFirst="
+ doLearningFirst);
for (int j = 0; j < mln.getnLayers(); j++)
System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(gradOK);
TestUtils.testModelSerialization(mln);
}
}
if (doLearningFirst) {
//Run a number of iterations of learning
mln.setInput(ds.getFeatures());
mln.setLabels(ds.getLabels());
mln.computeGradientAndScore();
double scoreBefore = mln.score();
for (int j = 0; j < 10; j++)
mln.fit(ds);
mln.computeGradientAndScore();
double scoreAfter = mln.score();
//Can't test in 'characteristic mode of operation' if not learning
String msg = testName
+ "- score did not (sufficiently) decrease during learning - activationFn="
+ afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation
+ ", doLearningFirst=" + doLearningFirst + " (before=" + scoreBefore
+ ", scoreAfter=" + scoreAfter + ")";
assertTrue(msg, scoreAfter < 0.8 * scoreBefore);
}
if (PRINT_RESULTS) {
System.out.println(testName + "- activationFn=" + afn + ", lossFn=" + lf
+ ", outputActivation=" + outputActivation + ", doLearningFirst="
+ doLearningFirst);
for (int j = 0; j < mln.getnLayers(); j++)
System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(gradOK);
TestUtils.testModelSerialization(mln);
}
}
@ -375,57 +370,43 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
int[] padding = {0, 0};
int size = 2;
String[] activations = {"sigmoid", "tanh"};
SubsamplingLayer.PoolingType[] poolingTypes =
new SubsamplingLayer.PoolingType[]{SubsamplingLayer.PoolingType.MAX,
SubsamplingLayer.PoolingType.AVG, SubsamplingLayer.PoolingType.PNORM};
for (int minibatchSize : minibatchSizes) {
INDArray input = Nd4j.rand(minibatchSize, width * height * inputDepth);
INDArray labels = TestUtils.randomOneHot(minibatchSize, nOut);
for (String afn : activations) {
for (SubsamplingLayer.PoolingType poolingType : poolingTypes) {
for (int minibatchSize : minibatchSizes) {
INDArray input = Nd4j.rand(minibatchSize, width * height * inputDepth);
INDArray labels = Nd4j.zeros(minibatchSize, nOut);
for (int i = 0; i < minibatchSize; i++) {
labels.putScalar(new int[]{i, i % nOut}, 1.0);
}
MultiLayerConfiguration conf =
new NeuralNetConfiguration.Builder()
.dataType(DataType.DOUBLE)
.updater(new NoOp())
.dist(new NormalDistribution(0, 1))
.list().layer(new ConvolutionLayer.Builder(kernel,
stride, padding).nIn(inputDepth)
.nOut(3).build())//output: (5-2+0)/1+1 = 4
.layer(new Upsampling2D.Builder().size(size).build()) //output: 4*2 =8 -> 8x8x3
.layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nIn(8 * 8 * 3)
.nOut(4).build())
.setInputType(InputType.convolutionalFlat(height, width,
inputDepth))
.build();
MultiLayerConfiguration conf =
new NeuralNetConfiguration.Builder()
.dataType(DataType.DOUBLE)
.updater(new NoOp())
.dist(new NormalDistribution(0, 1))
.list().layer(new ConvolutionLayer.Builder(kernel,
stride, padding).nIn(inputDepth)
.cudnnAllowFallback(false)
.nOut(3).build())//output: (5-2+0)/1+1 = 4
.layer(new Upsampling2D.Builder().size(size).build()) //output: 4*2 =8 -> 8x8x3
.layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nIn(8 * 8 * 3)
.nOut(4).build())
.setInputType(InputType.convolutionalFlat(height, width,
inputDepth))
.build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
String msg = "Upsampling - minibatch=" + minibatchSize;
String msg = "PoolingType=" + poolingType + ", minibatch=" + minibatchSize + ", activationFn="
+ afn;
if (PRINT_RESULTS) {
System.out.println(msg);
for (int j = 0; j < net.getnLayers(); j++)
System.out.println("Layer " + j + " # params: " + net.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(msg, gradOK);
TestUtils.testModelSerialization(net);
}
if (PRINT_RESULTS) {
System.out.println(msg);
for (int j = 0; j < net.getnLayers(); j++)
System.out.println("Layer " + j + " # params: " + net.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(msg, gradOK);
TestUtils.testModelSerialization(net);
}
}
@ -646,63 +627,56 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
public void testCnnSamePaddingMode() {
int nOut = 2;
int[] minibatchSizes = {1, 3};
int[] minibatchSizes = {1, 3, 3, 2, 1, 2};
int[] heights = new int[]{4, 5, 6, 5, 4, 4}; //Same padding mode: insensitive to exact input size...
int[] kernelSizes = new int[]{2, 3, 2, 3, 2, 3};
int[] inputDepths = {1, 2, 4, 3, 2, 3};
int width = 5;
int[] heights = new int[]{4, 5, 6}; //Same padding mode: insensitive to exact input size...
int[] kernelSizes = new int[]{2, 3};
int[] inputDepths = {1, 2, 4};
Nd4j.getRandom().setSeed(12345);
for (int inputDepth : inputDepths) {
for (int minibatchSize : minibatchSizes) {
for (int height : heights) {
for (int k : kernelSizes) {
for( int i=0; i<minibatchSizes.length; i++ ){
int inputDepth = inputDepths[i];
int minibatchSize = minibatchSizes[i];
int height = heights[i];
int k = kernelSizes[i];
INDArray input = Nd4j.rand(minibatchSize, width * height * inputDepth);
INDArray labels = Nd4j.zeros(minibatchSize, nOut);
for (int i = 0; i < minibatchSize; i++) {
labels.putScalar(new int[]{i, i % nOut}, 1.0);
}
INDArray input = Nd4j.rand(minibatchSize, width * height * inputDepth);
INDArray labels = TestUtils.randomOneHot(minibatchSize, nOut);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
.dataType(DataType.DOUBLE)
.updater(new NoOp())
.activation(Activation.TANH).convolutionMode(Same).list()
.layer(0, new ConvolutionLayer.Builder().name("layer 0").kernelSize(k, k)
.cudnnAllowFallback(false)
.stride(1, 1).padding(0, 0).nIn(inputDepth).nOut(2).build())
.layer(1, new SubsamplingLayer.Builder()
.poolingType(SubsamplingLayer.PoolingType.MAX).kernelSize(k, k)
.cudnnAllowFallback(false)
.stride(1, 1).padding(0, 0).build())
.layer(2, new ConvolutionLayer.Builder().nIn(2).nOut(2).kernelSize(k, k)
.cudnnAllowFallback(false)
.stride(1, 1).padding(0, 0).build())
.layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nOut(nOut).build())
.setInputType(InputType.convolutionalFlat(height, width, inputDepth)).build();
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
.dataType(DataType.DOUBLE)
.updater(new NoOp())
.activation(Activation.TANH).convolutionMode(Same).list()
.layer(0, new ConvolutionLayer.Builder().name("layer 0").kernelSize(k, k)
.stride(1, 1).padding(0, 0).nIn(inputDepth).nOut(2).build())
.layer(1, new SubsamplingLayer.Builder()
.poolingType(SubsamplingLayer.PoolingType.MAX).kernelSize(k, k)
.stride(1, 1).padding(0, 0).build())
.layer(2, new ConvolutionLayer.Builder().nIn(2).nOut(2).kernelSize(k, k)
.stride(1, 1).padding(0, 0).build())
.layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nOut(nOut).build())
.setInputType(InputType.convolutionalFlat(height, width, inputDepth)).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
for (int i = 0; i < net.getLayers().length; i++) {
System.out.println("nParams, layer " + i + ": " + net.getLayer(i).numParams());
}
String msg = "Minibatch=" + minibatchSize + ", inDepth=" + inputDepth + ", height=" + height
+ ", width=" + width + ", kernelSize=" + k;
System.out.println(msg);
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(msg, gradOK);
TestUtils.testModelSerialization(net);
}
}
for (int j = 0; j < net.getLayers().length; j++) {
System.out.println("nParams, layer " + j + ": " + net.getLayer(j).numParams());
}
String msg = "Minibatch=" + minibatchSize + ", inDepth=" + inputDepth + ", height=" + height
+ ", kernelSize=" + k;
System.out.println(msg);
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(msg, gradOK);
TestUtils.testModelSerialization(net);
}
}
@ -732,12 +706,10 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
}
Layer convLayer = new ConvolutionLayer.Builder().name("layer 0").kernelSize(k, k)
.cudnnAllowFallback(false)
.stride(stride, stride).padding(0, 0).nIn(inputDepth).nOut(2).build();
Layer poolLayer = new SubsamplingLayer.Builder()
.poolingType(SubsamplingLayer.PoolingType.MAX).kernelSize(k, k)
.cudnnAllowFallback(false)
.stride(stride, stride).padding(0, 0).build();
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
@ -765,7 +737,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input,
labels);
labels, null, null, true, 128);
assertTrue(msg, gradOK);
@ -783,69 +755,66 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
Nd4j.getRandom().setSeed(12345);
int nOut = 4;
int[] minibatchSizes = {1, 3};
int width = 6;
int height = 6;
int[] inputDepths = {1, 3};
int[] kernel = {2, 2};
int[] stride = {1, 1};
int[] padding = {0, 0};
int[] minibatchSizes = {1, 3, 2};
int[] inputDepths = {1, 3, 2};
int[][] zeroPadLayer = new int[][]{{0, 0, 0, 0}, {1, 1, 0, 0}, {2, 2, 2, 2}};
for (int inputDepth : inputDepths) {
for (int minibatchSize : minibatchSizes) {
INDArray input = Nd4j.rand(new int[]{minibatchSize, inputDepth, height, width});
INDArray labels = Nd4j.zeros(minibatchSize, nOut);
for (int i = 0; i < minibatchSize; i++) {
labels.putScalar(new int[]{i, i % nOut}, 1.0);
}
for (int[] zeroPad : zeroPadLayer) {
for( int i=0; i<minibatchSizes.length; i++ ){
int minibatchSize = minibatchSizes[i];
int inputDepth = inputDepths[i];
int[] zeroPad = zeroPadLayer[i];
INDArray input = Nd4j.rand(DataType.DOUBLE, new int[]{minibatchSize, inputDepth, height, width});
INDArray labels = TestUtils.randomOneHot(minibatchSize, nOut);
MultiLayerConfiguration conf =
new NeuralNetConfiguration.Builder().updater(new NoOp())
.dataType(DataType.DOUBLE)
.dist(new NormalDistribution(0, 1)).list()
.layer(0, new ConvolutionLayer.Builder(kernel, stride, padding)
.cudnnAllowFallback(false)
.nIn(inputDepth).nOut(3).build())//output: (6-2+0)/1+1 = 5
.layer(1, new ZeroPaddingLayer.Builder(zeroPad).build()).layer(2,
new ConvolutionLayer.Builder(kernel, stride,
padding).nIn(3).nOut(3).cudnnAllowFallback(false).build())//output: (6-2+0)/1+1 = 5
.layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nOut(4).build())
.setInputType(InputType.convolutional(height, width, inputDepth))
.build();
MultiLayerConfiguration conf =
new NeuralNetConfiguration.Builder().updater(new NoOp())
.dataType(DataType.DOUBLE)
.dist(new NormalDistribution(0, 1)).list()
.layer(0, new ConvolutionLayer.Builder(kernel, stride, padding)
.nIn(inputDepth).nOut(3).build())//output: (6-2+0)/1+1 = 5
.layer(1, new ZeroPaddingLayer.Builder(zeroPad).build()).layer(2,
new ConvolutionLayer.Builder(kernel, stride,
padding).nIn(3).nOut(3).build())//output: (6-2+0)/1+1 = 5
.layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nOut(4).build())
.setInputType(InputType.convolutional(height, width, inputDepth))
.build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
//Check zero padding activation shape
org.deeplearning4j.nn.layers.convolution.ZeroPaddingLayer zpl =
(org.deeplearning4j.nn.layers.convolution.ZeroPaddingLayer) net.getLayer(1);
val expShape = new long[]{minibatchSize, inputDepth, height + zeroPad[0] + zeroPad[1],
width + zeroPad[2] + zeroPad[3]};
INDArray out = zpl.activate(input, false, LayerWorkspaceMgr.noWorkspaces());
assertArrayEquals(expShape, out.shape());
//Check zero padding activation shape
org.deeplearning4j.nn.layers.convolution.ZeroPaddingLayer zpl =
(org.deeplearning4j.nn.layers.convolution.ZeroPaddingLayer) net.getLayer(1);
val expShape = new long[]{minibatchSize, inputDepth, height + zeroPad[0] + zeroPad[1],
width + zeroPad[2] + zeroPad[3]};
INDArray out = zpl.activate(input, false, LayerWorkspaceMgr.noWorkspaces());
assertArrayEquals(expShape, out.shape());
String msg = "minibatch=" + minibatchSize + ", channels=" + inputDepth + ", zeroPad = "
+ Arrays.toString(zeroPad);
String msg = "minibatch=" + minibatchSize + ", channels=" + inputDepth + ", zeroPad = "
+ Arrays.toString(zeroPad);
if (PRINT_RESULTS) {
System.out.println(msg);
for (int j = 0; j < net.getnLayers(); j++)
System.out.println("Layer " + j + " # params: " + net.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(msg, gradOK);
TestUtils.testModelSerialization(net);
}
if (PRINT_RESULTS) {
System.out.println(msg);
for (int j = 0; j < net.getnLayers(); j++)
System.out.println("Layer " + j + " # params: " + net.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(msg, gradOK);
TestUtils.testModelSerialization(net);
}
}
@ -853,12 +822,12 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
public void testDeconvolution2D() {
int nOut = 2;
int[] minibatchSizes = new int[]{1, 4, 1, 4, 1, 1, 2, 1};
int[] kernelSizes = new int[]{1, 1, 3, 3, 1, 1, 3, 3};
int[] strides = {1, 1, 1, 1, 2, 2, 2, 2};
int[] dilation = {1, 2, 2, 1, 1, 1, 2, 2};
Activation[] activations = new Activation[]{Activation.SIGMOID, Activation.TANH, Activation.TANH, Activation.TANH, Activation.TANH, Activation.SIGMOID, Activation.SIGMOID, Activation.SIGMOID};
ConvolutionMode[] cModes = new ConvolutionMode[]{Truncate, Truncate, Truncate, Truncate, Truncate, Truncate, Truncate, Truncate};
int[] minibatchSizes = new int[]{1, 3, 3, 1, 3};
int[] kernelSizes = new int[]{1, 1, 1, 3, 3};
int[] strides = {1, 1, 2, 2, 2};
int[] dilation = {1, 2, 1, 2, 2};
Activation[] activations = new Activation[]{Activation.SIGMOID, Activation.TANH, Activation.SIGMOID, Activation.SIGMOID, Activation.SIGMOID};
ConvolutionMode[] cModes = new ConvolutionMode[]{Same, Same, Truncate, Truncate, Truncate};
int width = 7;
int height = 7;
int inputDepth = 3;
@ -888,23 +857,12 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
.updater(new NoOp())
.activation(act)
.list()
.layer(new Deconvolution2D.Builder().name("deconvolution_2D_layer-0")
.cudnnAllowFallback(false)
.kernelSize(1, 1)
.stride(1, 1)
.dilation(0, 0)
.convolutionMode(cm)
.nIn(inputDepth)
.nOut(inputDepth)
.build())
.layer(new Deconvolution2D.Builder().name("deconvolution_2D_layer")
.cudnnAllowFallback(false)
.kernelSize(k, k)
.stride(s, s)
.dilation(d, d)
.convolutionMode(cm)
.nIn(inputDepth).nOut(nOut)
.build());
.nIn(inputDepth).nOut(nOut).build());
MultiLayerConfiguration conf = b.layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nOut(nOut).build())
@ -922,7 +880,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
System.out.println(msg);
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels, null, null, true, 100);
assertTrue(msg, gradOK);
@ -936,16 +894,16 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
int depthMultiplier = 2;
int nOut = nIn * depthMultiplier;
int width = 8;
int height = 8;
int width = 5;
int height = 5;
Nd4j.getRandom().setSeed(12345);
int[] ks = new int[]{1,3,1,3,1,3,1,3};
int[] ss = new int[]{1,1,2,2,1,1,2,2};
int[] ks = new int[]{1,3,3,1,3};
int[] ss = new int[]{1,1,1,2,2};
ConvolutionMode[] cms = new ConvolutionMode[]{
Truncate, Truncate, Truncate, Truncate, Truncate, Truncate, Truncate, Truncate};
int[] mb = new int[]{1,1,3,3,3,1,3,3};
Truncate, Truncate, Truncate, Truncate, Truncate};
int[] mb = new int[]{1,1,1,3,3};
for( int t=0; t<ks.length; t++ ){
@ -987,11 +945,11 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
}
String msg = " - mb=" + minibatchSize + ", k="
+ k + ", s=" + s + ", cm=" + cm;
+ k + ", nIn=" + nIn + ", depthMul=" + depthMultiplier + ", s=" + s + ", cm=" + cm;
System.out.println(msg);
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels, null, null, true, 256);
assertTrue(msg, gradOK);
@ -1004,20 +962,20 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
public void testSeparableConv2D() {
int nOut = 2;
int width = 8;
int height = 8;
int[] minibatchSizes = new int[]{1, 3};
int width = 6;
int height = 6;
int inputDepth = 3;
Nd4j.getRandom().setSeed(12345);
int[] ks = new int[]{1,3,1,3,1,3,1,3};
int[] ss = new int[]{1,1,2,2,1,1,2,2};
int[] ds = new int[]{1,1,1,1,2,2,2,2};
ConvolutionMode[] cms = new ConvolutionMode[]{
Truncate, Truncate, Truncate, Truncate, Truncate, Truncate, Truncate, Truncate};
int[] mb = new int[]{1,1,3,3,3,1,3,3};
int[] ks = new int[]{1, 3, 3, 1, 3};
int[] ss = new int[]{1, 1, 1, 2, 2};
int[] ds = new int[]{1, 1, 2, 2, 2};
ConvolutionMode[] cms = new ConvolutionMode[]{Truncate, Truncate, Truncate, Truncate, Truncate};
int[] mb = new int[]{1, 1, 1, 3, 3};
for( int t=0; t<ks.length; t++ ){
for (int t = 0; t < ks.length; t++) {
int k = ks[t];
int s = ss[t];
@ -1041,10 +999,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
.activation(Activation.TANH)
.convolutionMode(cm)
.list()
.layer(new Convolution2D.Builder().kernelSize(1, 1).stride(1, 1)
.nIn(inputDepth).nOut(inputDepth).build())
.layer(new SeparableConvolution2D.Builder().name("Separable conv 2D layer")
.cudnnAllowFallback(false)
.kernelSize(k, k)
.stride(s, s)
.dilation(d, d)
@ -1067,7 +1022,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
System.out.println(msg);
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels, null, null, true, 50); //Most params are in output layer
assertTrue(msg, gradOK);
@ -1079,21 +1034,21 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
public void testCnnDilated() {
int nOut = 2;
int minibatchSize = 3;
int minibatchSize = 2;
int width = 8;
int height = 8;
int inputDepth = 3;
int inputDepth = 2;
Nd4j.getRandom().setSeed(12345);
boolean[] sub = new boolean[]{true,false,true,false,true,false,true,false};
int[] stride = new int[]{1,1,2,2,1,1,2,2};
int[] kernel = new int[]{2,2,2,2,3,3,3,3};
int[] ds = new int[]{2,3,3,2,2,3,3,2};
ConvolutionMode[] cms = new ConvolutionMode[]{Same, Same, Same, Truncate, Truncate, Truncate, Same, Truncate};
boolean[] sub = new boolean[]{true, true, false, true, false};
int[] stride = new int[]{1, 1, 1, 2, 2};
int[] kernel = new int[]{2, 3, 3, 3, 3};
int[] ds = new int[]{2, 2, 3, 3, 2};
ConvolutionMode[] cms = new ConvolutionMode[]{Same, Truncate, Truncate, Same, Truncate};
for(int t=0; t<sub.length; t++ ){
for (int t = 0; t < sub.length; t++) {
boolean subsampling = sub[t];
int s = stride[t];
@ -1119,14 +1074,12 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
.kernelSize(k, k)
.stride(s, s)
.dilation(d, d)
.cudnnAllowFallback(false)
.nIn(inputDepth).nOut(2).build());
if (subsampling) {
b.layer(new SubsamplingLayer.Builder()
.poolingType(SubsamplingLayer.PoolingType.MAX)
.kernelSize(k, k)
.stride(s, s)
.cudnnAllowFallback(false)
.dilation(d, d)
.build());
} else {
@ -1134,7 +1087,6 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
.kernelSize(k, k)
.stride(s, s)
.dilation(d, d)
.cudnnAllowFallback(false)
.build());
}
@ -1166,7 +1118,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
@Test
public void testCropping2DLayer() {
Nd4j.getRandom().setSeed(12345);
int nOut = 4;
int nOut = 2;
int[] minibatchSizes = {1, 3};
int width = 12;
@ -1177,7 +1129,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
int[] stride = {1, 1};
int[] padding = {0, 0};
int[][] cropTestCases = new int[][]{{0, 0, 0, 0}, {1, 1, 0, 0}, {2, 2, 2, 2}, {1,2,3,4}};
int[][] cropTestCases = new int[][]{{0, 0, 0, 0}, {1, 1, 0, 0}, {2, 2, 2, 2}, {1, 2, 3, 4}};
for (int inputDepth : inputDepths) {
for (int minibatchSize : minibatchSizes) {
@ -1195,12 +1147,12 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
.convolutionMode(ConvolutionMode.Same)
.weightInit(new NormalDistribution(0, 1)).list()
.layer(new ConvolutionLayer.Builder(kernel, stride, padding)
.cudnnAllowFallback(false)
.nIn(inputDepth).nOut(3).build())//output: (6-2+0)/1+1 = 5
.nIn(inputDepth).nOut(2).build())//output: (6-2+0)/1+1 = 5
.layer(new Cropping2D(crop))
.layer(new ConvolutionLayer.Builder(kernel, stride,padding).nIn(3).nOut(3).cudnnAllowFallback(false).build())
.layer(new ConvolutionLayer.Builder(kernel, stride, padding).nIn(2).nOut(2).build())
.layer(new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.AVG).kernelSize(3, 3).stride(3, 3).build())
.layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nOut(4).build())
.activation(Activation.SOFTMAX).nOut(nOut).build())
.setInputType(InputType.convolutional(height, width, inputDepth))
.build();
@ -1225,7 +1177,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
}
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels, null, null, true, 160);
assertTrue(msg, gradOK);

View File

@ -369,10 +369,10 @@ public class CuDNNGradientChecks extends BaseDL4JTest {
public void testLSTM() throws Exception {
Nd4j.getRandom().setSeed(12345);
int minibatch = 10;
int inputSize = 8;
int lstmLayerSize = 7;
int timeSeriesLength = 6;
int minibatch = 4;
int inputSize = 3;
int lstmLayerSize = 4;
int timeSeriesLength = 3;
int nOut = 4;
INDArray input = Nd4j.rand(new int[] {minibatch, inputSize, timeSeriesLength});
INDArray labels = Nd4j.zeros(minibatch, nOut, timeSeriesLength);
@ -417,7 +417,7 @@ public class CuDNNGradientChecks extends BaseDL4JTest {
}
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels, null, null, true, 32);
assertTrue(gradOK);
}
@ -489,10 +489,7 @@ public class CuDNNGradientChecks extends BaseDL4JTest {
int width = 8;
int height = 8;
int inputDepth = 3;
int[] kernelSizes = new int[]{2, 3};
int[] strides = {1, 2};
int[] dilation = {2, 3};
ConvolutionMode[] cModes = new ConvolutionMode[]{ConvolutionMode.Truncate, ConvolutionMode.Same};
Nd4j.getRandom().setSeed(12345);
@ -502,85 +499,88 @@ public class CuDNNGradientChecks extends BaseDL4JTest {
Field f2 = org.deeplearning4j.nn.layers.convolution.subsampling.SubsamplingLayer.class.getDeclaredField("helper");
f2.setAccessible(true);
int[] kernelSizes = new int[]{2, 3, 2};
int[] strides = {1, 2, 2};
int[] dilation = {2, 3, 2};
ConvolutionMode[] cModes = new ConvolutionMode[]{ConvolutionMode.Truncate, ConvolutionMode.Same, ConvolutionMode.Truncate};
for (boolean subsampling : new boolean[]{false, true}) {
for (int k : kernelSizes) {
for (int s : strides) {
for (int d : dilation) {
for (ConvolutionMode cm : cModes) {
for (int t = 0; t < kernelSizes.length; t++) {
int k = kernelSizes[t];
int s = strides[t];
int d = dilation[t];
ConvolutionMode cm = cModes[t];
//Use larger input with larger dilation values (to avoid invalid config)
int w = d * width;
int h = d * height;
//Use larger input with larger dilation values (to avoid invalid config)
int w = d * width;
int h = d * height;
INDArray input = Nd4j.rand(minibatchSize, w * h * inputDepth);
INDArray labels = Nd4j.zeros(minibatchSize, nOut);
for (int i = 0; i < minibatchSize; i++) {
labels.putScalar(new int[]{i, i % nOut}, 1.0);
}
NeuralNetConfiguration.ListBuilder b = new NeuralNetConfiguration.Builder().seed(12345)
.dataType(DataType.DOUBLE)
.updater(new NoOp())
.activation(Activation.TANH).convolutionMode(cm).list()
.layer(new ConvolutionLayer.Builder().name("layer 0")
.kernelSize(k, k)
.stride(s, s)
.dilation(d, d)
.nIn(inputDepth).nOut(2).build());
if (subsampling) {
b.layer(new SubsamplingLayer.Builder()
.poolingType(SubsamplingLayer.PoolingType.MAX)
.kernelSize(k, k)
.stride(s, s)
.dilation(d, d)
.build());
} else {
b.layer(new ConvolutionLayer.Builder().nIn(2).nOut(2)
.kernelSize(k, k)
.stride(s, s)
.dilation(d, d)
.build());
}
MultiLayerConfiguration conf = b.layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nOut(nOut).build())
.setInputType(InputType.convolutionalFlat(h, w, inputDepth)).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
org.deeplearning4j.nn.layers.convolution.ConvolutionLayer c0 =
(org.deeplearning4j.nn.layers.convolution.ConvolutionLayer)net.getLayer(0);
ConvolutionHelper ch0 = (ConvolutionHelper) f.get(c0);
assertTrue(ch0 instanceof CudnnConvolutionHelper);
if(subsampling){
org.deeplearning4j.nn.layers.convolution.subsampling.SubsamplingLayer s1 =
(org.deeplearning4j.nn.layers.convolution.subsampling.SubsamplingLayer)net.getLayer(1);
SubsamplingHelper sh1 = (SubsamplingHelper) f2.get(s1);
assertTrue(sh1 instanceof SubsamplingHelper);
} else {
org.deeplearning4j.nn.layers.convolution.ConvolutionLayer c1 =
(org.deeplearning4j.nn.layers.convolution.ConvolutionLayer)net.getLayer(1);
ConvolutionHelper ch1 = (ConvolutionHelper) f.get(c1);
assertTrue(ch1 instanceof CudnnConvolutionHelper);
}
for (int i = 0; i < net.getLayers().length; i++) {
System.out.println("nParams, layer " + i + ": " + net.getLayer(i).numParams());
}
String msg = (subsampling ? "subsampling" : "conv") + " - mb=" + minibatchSize + ", k="
+ k + ", s=" + s + ", d=" + d + ", cm=" + cm;
System.out.println(msg);
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(msg, gradOK);
}
}
INDArray input = Nd4j.rand(minibatchSize, w * h * inputDepth);
INDArray labels = Nd4j.zeros(minibatchSize, nOut);
for (int i = 0; i < minibatchSize; i++) {
labels.putScalar(new int[]{i, i % nOut}, 1.0);
}
NeuralNetConfiguration.ListBuilder b = new NeuralNetConfiguration.Builder().seed(12345)
.dataType(DataType.DOUBLE)
.updater(new NoOp())
.activation(Activation.TANH).convolutionMode(cm).list()
.layer(new ConvolutionLayer.Builder().name("layer 0")
.kernelSize(k, k)
.stride(s, s)
.dilation(d, d)
.nIn(inputDepth).nOut(2).build());
if (subsampling) {
b.layer(new SubsamplingLayer.Builder()
.poolingType(SubsamplingLayer.PoolingType.MAX)
.kernelSize(k, k)
.stride(s, s)
.dilation(d, d)
.build());
} else {
b.layer(new ConvolutionLayer.Builder().nIn(2).nOut(2)
.kernelSize(k, k)
.stride(s, s)
.dilation(d, d)
.build());
}
MultiLayerConfiguration conf = b.layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nOut(nOut).build())
.setInputType(InputType.convolutionalFlat(h, w, inputDepth)).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
org.deeplearning4j.nn.layers.convolution.ConvolutionLayer c0 =
(org.deeplearning4j.nn.layers.convolution.ConvolutionLayer) net.getLayer(0);
ConvolutionHelper ch0 = (ConvolutionHelper) f.get(c0);
assertTrue(ch0 instanceof CudnnConvolutionHelper);
if (subsampling) {
org.deeplearning4j.nn.layers.convolution.subsampling.SubsamplingLayer s1 =
(org.deeplearning4j.nn.layers.convolution.subsampling.SubsamplingLayer) net.getLayer(1);
SubsamplingHelper sh1 = (SubsamplingHelper) f2.get(s1);
assertTrue(sh1 instanceof SubsamplingHelper);
} else {
org.deeplearning4j.nn.layers.convolution.ConvolutionLayer c1 =
(org.deeplearning4j.nn.layers.convolution.ConvolutionLayer) net.getLayer(1);
ConvolutionHelper ch1 = (ConvolutionHelper) f.get(c1);
assertTrue(ch1 instanceof CudnnConvolutionHelper);
}
for (int i = 0; i < net.getLayers().length; i++) {
System.out.println("nParams, layer " + i + ": " + net.getLayer(i).numParams());
}
String msg = (subsampling ? "subsampling" : "conv") + " - mb=" + minibatchSize + ", k="
+ k + ", s=" + s + ", d=" + d + ", cm=" + cm;
System.out.println(msg);
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(msg, gradOK);
}
}
}
@ -588,7 +588,7 @@ public class CuDNNGradientChecks extends BaseDL4JTest {
@Test
public void testDropout() {
int minibatch = 3;
int minibatch = 2;
for (boolean cnn : new boolean[]{false, true}) {
Nd4j.getRandom().setSeed(12345);
@ -605,15 +605,15 @@ public class CuDNNGradientChecks extends BaseDL4JTest {
.list();
if (cnn) {
builder.layer(new ConvolutionLayer.Builder().kernelSize(3, 3).stride(1, 1).nOut(3).build());
builder.layer(new ConvolutionLayer.Builder().kernelSize(3, 3).stride(1, 1).nOut(3).build());
builder.setInputType(InputType.convolutional(8, 8, 3));
builder.layer(new ConvolutionLayer.Builder().kernelSize(2, 2).stride(2, 2).nOut(2).build());
builder.layer(new ConvolutionLayer.Builder().kernelSize(2, 2).stride(2, 2).nOut(2).build());
builder.setInputType(InputType.convolutional(8, 8, 2));
} else {
builder.layer(new DenseLayer.Builder().nOut(12).build());
builder.layer(new DenseLayer.Builder().nOut(12).build());
builder.setInputType(InputType.feedForward(8));
builder.layer(new DenseLayer.Builder().nOut(8).build());
builder.layer(new DenseLayer.Builder().nOut(8).build());
builder.setInputType(InputType.feedForward(6));
}
builder.layer(new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build());
builder.layer(new OutputLayer.Builder().nOut(3).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build());
MultiLayerConfiguration conf = builder.build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
@ -621,11 +621,11 @@ public class CuDNNGradientChecks extends BaseDL4JTest {
INDArray f;
if (cnn) {
f = Nd4j.rand(new int[]{minibatch, 3, 8, 8}).muli(10).subi(5);
f = Nd4j.rand(new int[]{minibatch, 2, 8, 8}).muli(10).subi(5);
} else {
f = Nd4j.rand(minibatch, 8).muli(10).subi(5);
f = Nd4j.rand(minibatch, 6).muli(10).subi(5);
}
INDArray l = TestUtils.randomOneHot(minibatch, 10);
INDArray l = TestUtils.randomOneHot(minibatch, 3);
mln.output(f, true);

View File

@ -0,0 +1,140 @@
/*******************************************************************************
* Copyright (c) 2015-2018 Skymind, Inc.
*
* This program and the accompanying materials are made available under the
* terms of the Apache License, Version 2.0 which is available at
* https://www.apache.org/licenses/LICENSE-2.0.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
* SPDX-License-Identifier: Apache-2.0
******************************************************************************/
package org.deeplearning4j.graph;
import lombok.extern.slf4j.Slf4j;
import org.bytedeco.javacpp.Pointer;
import org.junit.After;
import org.junit.Before;
import org.junit.Rule;
import org.junit.rules.TestName;
import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.api.memory.MemoryWorkspace;
import org.nd4j.linalg.api.ops.executioner.OpExecutioner;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.profiler.ProfilerConfig;
import java.lang.management.ManagementFactory;
import java.util.List;
import java.util.Map;
import java.util.Properties;
@Slf4j
public class BaseDL4JTest {
@Rule
public TestName name = new TestName();
protected long startTime;
protected int threadCountBefore;
/**
* Override this to set the profiling mode for the tests defined in the child class
*/
public OpExecutioner.ProfilingMode getProfilingMode(){
return OpExecutioner.ProfilingMode.SCOPE_PANIC;
}
/**
* Override this to set the datatype of the tests defined in the child class
*/
public DataType getDataType(){
return DataType.DOUBLE;
}
public DataType getDefaultFPDataType(){
return getDataType();
}
@Before
public void beforeTest(){
log.info("{}.{}", getClass().getSimpleName(), name.getMethodName());
Nd4j.getExecutioner().setProfilingMode(getProfilingMode());
Nd4j.getExecutioner().setProfilingConfig(ProfilerConfig.builder().build());
Nd4j.setDefaultDataTypes(getDataType(), getDefaultFPDataType());
startTime = System.currentTimeMillis();
threadCountBefore = ManagementFactory.getThreadMXBean().getThreadCount();
}
@After
public void afterTest(){
//Attempt to keep workspaces isolated between tests
Nd4j.getWorkspaceManager().destroyAllWorkspacesForCurrentThread();
MemoryWorkspace currWS = Nd4j.getMemoryManager().getCurrentWorkspace();
Nd4j.getMemoryManager().setCurrentWorkspace(null);
if(currWS != null){
//Not really safe to continue testing under this situation... other tests will likely fail with obscure
// errors that are hard to track back to this
log.error("Open workspace leaked from test! Exiting - {}, isOpen = {} - {}", currWS.getId(), currWS.isScopeActive(), currWS);
System.exit(1);
}
StringBuilder sb = new StringBuilder();
long maxPhys = Pointer.maxPhysicalBytes();
long maxBytes = Pointer.maxBytes();
long currPhys = Pointer.physicalBytes();
long currBytes = Pointer.totalBytes();
long jvmTotal = Runtime.getRuntime().totalMemory();
long jvmMax = Runtime.getRuntime().maxMemory();
int threadsAfter = ManagementFactory.getThreadMXBean().getThreadCount();
long duration = System.currentTimeMillis() - startTime;
sb.append(getClass().getSimpleName()).append(".").append(name.getMethodName())
.append(": ").append(duration).append(" ms")
.append(", threadCount: (").append(threadCountBefore).append("->").append(threadsAfter).append(")")
.append(", jvmTotal=").append(jvmTotal)
.append(", jvmMax=").append(jvmMax)
.append(", totalBytes=").append(currBytes).append(", maxBytes=").append(maxBytes)
.append(", currPhys=").append(currPhys).append(", maxPhys=").append(maxPhys);
List<MemoryWorkspace> ws = Nd4j.getWorkspaceManager().getAllWorkspacesForCurrentThread();
if(ws != null && ws.size() > 0){
long currSize = 0;
for(MemoryWorkspace w : ws){
currSize += w.getCurrentSize();
}
if(currSize > 0){
sb.append(", threadWSSize=").append(currSize)
.append(" (").append(ws.size()).append(" WSs)");
}
}
Properties p = Nd4j.getExecutioner().getEnvironmentInformation();
Object o = p.get("cuda.devicesInformation");
if(o instanceof List){
List<Map<String,Object>> l = (List<Map<String, Object>>) o;
if(l.size() > 0) {
sb.append(" [").append(l.size())
.append(" GPUs: ");
for (int i = 0; i < l.size(); i++) {
Map<String,Object> m = l.get(i);
if(i > 0)
sb.append(",");
sb.append("(").append(m.get("cuda.freeMemory")).append(" free, ")
.append(m.get("cuda.totalMemory")).append(" total)");
}
sb.append("]");
}
}
log.info(sb.toString());
}
}

View File

@ -17,6 +17,7 @@
package org.deeplearning4j.graph.data;
import org.apache.commons.lang3.ArrayUtils;
import org.deeplearning4j.graph.BaseDL4JTest;
import org.deeplearning4j.graph.api.Edge;
import org.deeplearning4j.graph.api.IGraph;
import org.deeplearning4j.graph.data.impl.DelimitedEdgeLineProcessor;
@ -32,7 +33,7 @@ import java.util.List;
import static org.junit.Assert.*;
public class TestGraphLoading {
public class TestGraphLoading extends BaseDL4JTest {
@Test(timeout = 10000L)
public void testEdgeListGraphLoading() throws IOException {

View File

@ -17,6 +17,7 @@
package org.deeplearning4j.graph.data;
import org.apache.commons.lang3.ArrayUtils;
import org.deeplearning4j.graph.BaseDL4JTest;
import org.deeplearning4j.graph.api.Edge;
import org.deeplearning4j.graph.api.IGraph;
import org.deeplearning4j.graph.data.impl.WeightedEdgeLineProcessor;
@ -32,7 +33,7 @@ import java.util.List;
import static junit.framework.TestCase.assertTrue;
import static org.junit.Assert.assertEquals;
public class TestGraphLoadingWeighted {
public class TestGraphLoadingWeighted extends BaseDL4JTest {
@Test(timeout = 10000L)
public void testWeightedDirected() throws IOException {

View File

@ -17,6 +17,7 @@
package org.deeplearning4j.graph.graph;
import org.apache.commons.lang3.ArrayUtils;
import org.deeplearning4j.graph.BaseDL4JTest;
import org.deeplearning4j.graph.api.*;
import org.deeplearning4j.graph.data.GraphLoader;
import org.deeplearning4j.graph.iterator.RandomWalkIterator;
@ -34,7 +35,7 @@ import static junit.framework.TestCase.assertTrue;
import static org.junit.Assert.*;
public class TestGraph {
public class TestGraph extends BaseDL4JTest {
@Test(timeout = 10000L)
public void testSimpleGraph() {

View File

@ -16,6 +16,7 @@
package org.deeplearning4j.graph.models.deepwalk;
import org.deeplearning4j.graph.BaseDL4JTest;
import org.deeplearning4j.graph.data.GraphLoader;
import org.deeplearning4j.graph.graph.Graph;
import org.deeplearning4j.graph.iterator.GraphWalkIterator;
@ -35,7 +36,7 @@ import java.util.Arrays;
import static org.junit.Assert.*;
public class DeepWalkGradientCheck {
public class DeepWalkGradientCheck extends BaseDL4JTest {
public static final double epsilon = 1e-8;
public static final double MAX_REL_ERROR = 1e-3;

View File

@ -17,6 +17,7 @@
package org.deeplearning4j.graph.models.deepwalk;
import org.apache.commons.io.FilenameUtils;
import org.deeplearning4j.graph.BaseDL4JTest;
import org.deeplearning4j.graph.api.Edge;
import org.deeplearning4j.graph.api.IGraph;
import org.deeplearning4j.graph.data.GraphLoader;
@ -42,7 +43,7 @@ import java.util.Random;
import static org.junit.Assert.*;
public class TestDeepWalk {
public class TestDeepWalk extends BaseDL4JTest {
@Rule
public TemporaryFolder testDir = new TemporaryFolder();
@ -214,7 +215,7 @@ public class TestDeepWalk {
Nd4j.getRandom().setSeed(12345);
int nEpochs = 50;
int nEpochs = 5;
//Set up network
DeepWalk<String, String> deepWalk =

View File

@ -16,6 +16,7 @@
package org.deeplearning4j.graph.models.deepwalk;
import org.deeplearning4j.graph.BaseDL4JTest;
import org.junit.Test;
import java.util.Arrays;
@ -24,7 +25,7 @@ import java.util.Set;
import static org.junit.Assert.*;
public class TestGraphHuffman {
public class TestGraphHuffman extends BaseDL4JTest {
@Test(timeout = 10000L)
public void testGraphHuffman() {

View File

@ -0,0 +1,140 @@
/*******************************************************************************
* Copyright (c) 2015-2018 Skymind, Inc.
*
* This program and the accompanying materials are made available under the
* terms of the Apache License, Version 2.0 which is available at
* https://www.apache.org/licenses/LICENSE-2.0.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
* SPDX-License-Identifier: Apache-2.0
******************************************************************************/
package org.deeplearning4j.nn.modelimport.keras;
import lombok.extern.slf4j.Slf4j;
import org.bytedeco.javacpp.Pointer;
import org.junit.After;
import org.junit.Before;
import org.junit.Rule;
import org.junit.rules.TestName;
import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.api.memory.MemoryWorkspace;
import org.nd4j.linalg.api.ops.executioner.OpExecutioner;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.profiler.ProfilerConfig;
import java.lang.management.ManagementFactory;
import java.util.List;
import java.util.Map;
import java.util.Properties;
@Slf4j
public class BaseDL4JTest {
@Rule
public TestName name = new TestName();
protected long startTime;
protected int threadCountBefore;
/**
* Override this to set the profiling mode for the tests defined in the child class
*/
public OpExecutioner.ProfilingMode getProfilingMode(){
return OpExecutioner.ProfilingMode.SCOPE_PANIC;
}
/**
* Override this to set the datatype of the tests defined in the child class
*/
public DataType getDataType(){
return DataType.DOUBLE;
}
public DataType getDefaultFPDataType(){
return getDataType();
}
@Before
public void beforeTest(){
log.info("{}.{}", getClass().getSimpleName(), name.getMethodName());
Nd4j.getExecutioner().setProfilingMode(getProfilingMode());
Nd4j.getExecutioner().setProfilingConfig(ProfilerConfig.builder().build());
Nd4j.setDefaultDataTypes(getDataType(), getDefaultFPDataType());
startTime = System.currentTimeMillis();
threadCountBefore = ManagementFactory.getThreadMXBean().getThreadCount();
}
@After
public void afterTest(){
//Attempt to keep workspaces isolated between tests
Nd4j.getWorkspaceManager().destroyAllWorkspacesForCurrentThread();
MemoryWorkspace currWS = Nd4j.getMemoryManager().getCurrentWorkspace();
Nd4j.getMemoryManager().setCurrentWorkspace(null);
if(currWS != null){
//Not really safe to continue testing under this situation... other tests will likely fail with obscure
// errors that are hard to track back to this
log.error("Open workspace leaked from test! Exiting - {}, isOpen = {} - {}", currWS.getId(), currWS.isScopeActive(), currWS);
System.exit(1);
}
StringBuilder sb = new StringBuilder();
long maxPhys = Pointer.maxPhysicalBytes();
long maxBytes = Pointer.maxBytes();
long currPhys = Pointer.physicalBytes();
long currBytes = Pointer.totalBytes();
long jvmTotal = Runtime.getRuntime().totalMemory();
long jvmMax = Runtime.getRuntime().maxMemory();
int threadsAfter = ManagementFactory.getThreadMXBean().getThreadCount();
long duration = System.currentTimeMillis() - startTime;
sb.append(getClass().getSimpleName()).append(".").append(name.getMethodName())
.append(": ").append(duration).append(" ms")
.append(", threadCount: (").append(threadCountBefore).append("->").append(threadsAfter).append(")")
.append(", jvmTotal=").append(jvmTotal)
.append(", jvmMax=").append(jvmMax)
.append(", totalBytes=").append(currBytes).append(", maxBytes=").append(maxBytes)
.append(", currPhys=").append(currPhys).append(", maxPhys=").append(maxPhys);
List<MemoryWorkspace> ws = Nd4j.getWorkspaceManager().getAllWorkspacesForCurrentThread();
if(ws != null && ws.size() > 0){
long currSize = 0;
for(MemoryWorkspace w : ws){
currSize += w.getCurrentSize();
}
if(currSize > 0){
sb.append(", threadWSSize=").append(currSize)
.append(" (").append(ws.size()).append(" WSs)");
}
}
Properties p = Nd4j.getExecutioner().getEnvironmentInformation();
Object o = p.get("cuda.devicesInformation");
if(o instanceof List){
List<Map<String,Object>> l = (List<Map<String, Object>>) o;
if(l.size() > 0) {
sb.append(" [").append(l.size())
.append(" GPUs: ");
for (int i = 0; i < l.size(); i++) {
Map<String,Object> m = l.get(i);
if(i > 0)
sb.append(",");
sb.append("(").append(m.get("cuda.freeMemory")).append(" free, ")
.append(m.get("cuda.totalMemory")).append(" total)");
}
sb.append("]");
}
}
log.info(sb.toString());
}
}

View File

@ -38,7 +38,7 @@ import java.util.concurrent.atomic.AtomicInteger;
import static org.junit.Assert.*;
public class MiscTests {
public class MiscTests extends BaseDL4JTest {
@Rule
public TemporaryFolder testDir = new TemporaryFolder();

View File

@ -24,6 +24,7 @@ import org.deeplearning4j.datasets.datavec.SequenceRecordReaderDataSetIterator;
import org.deeplearning4j.nn.layers.recurrent.LSTM;
import org.deeplearning4j.nn.layers.recurrent.LastTimeStepLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.KerasModel;
import org.deeplearning4j.nn.modelimport.keras.KerasSequentialModel;
import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
@ -54,7 +55,7 @@ import java.util.List;
import static junit.framework.TestCase.assertTrue;
@Ignore("AB - 2019/05/27 - NPE on CUDA only. Ignored to get all passing baseline on master; see issue 7657")
public class FullModelComparisons {
public class FullModelComparisons extends BaseDL4JTest {
ClassLoader classLoader = FullModelComparisons.class.getClassLoader();

View File

@ -18,6 +18,7 @@ package org.deeplearning4j.nn.modelimport.keras.configurations;
import org.deeplearning4j.nn.conf.InputPreProcessor;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.preprocessors.KerasFlattenRnnPreprocessor;
import org.deeplearning4j.nn.modelimport.keras.preprocessors.PermutePreprocessor;
import org.deeplearning4j.nn.modelimport.keras.preprocessors.ReshapePreprocessor;
@ -26,7 +27,7 @@ import org.junit.Test;
import static org.junit.Assert.assertEquals;
public class JsonTest {
public class JsonTest extends BaseDL4JTest {
@Test
public void testJsonPreprocessors() throws Exception {

View File

@ -20,6 +20,7 @@ import lombok.extern.slf4j.Slf4j;
import org.deeplearning4j.nn.conf.ComputationGraphConfiguration;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.graph.ComputationGraph;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.KerasModel;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
import org.junit.Test;
@ -36,7 +37,7 @@ import java.io.InputStream;
*/
@Slf4j
public class Keras1ModelConfigurationTest {
public class Keras1ModelConfigurationTest extends BaseDL4JTest {
private ClassLoader classLoader = getClass().getClassLoader();

View File

@ -21,6 +21,7 @@ import lombok.val;
import org.deeplearning4j.nn.conf.ComputationGraphConfiguration;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.graph.ComputationGraph;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
import org.deeplearning4j.nn.modelimport.keras.KerasModel;
import org.deeplearning4j.nn.modelimport.keras.KerasModelImport;
@ -49,7 +50,7 @@ import static org.junit.Assert.assertArrayEquals;
*/
@Slf4j
public class Keras2ModelConfigurationTest {
public class Keras2ModelConfigurationTest extends BaseDL4JTest {
ClassLoader classLoader = getClass().getClassLoader();

View File

@ -18,6 +18,7 @@ package org.deeplearning4j.nn.modelimport.keras.configurations;
import org.deeplearning4j.nn.conf.distribution.*;
import org.deeplearning4j.nn.conf.layers.DenseLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -31,7 +32,7 @@ import java.util.Map;
import static org.junit.Assert.assertEquals;
public class KerasInitilizationTest {
public class KerasInitilizationTest extends BaseDL4JTest {
private double minValue = -0.2;
private double maxValue = 0.2;

View File

@ -18,6 +18,7 @@ package org.deeplearning4j.nn.modelimport.keras.configurations;
import lombok.extern.slf4j.Slf4j;
import lombok.val;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.KerasModelImport;
import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
@ -38,10 +39,7 @@ import static org.junit.Assert.assertNotNull;
* Test import of Keras models.
*/
@Slf4j
public class KerasModelImportTest {
ClassLoader classLoader = KerasModelImportTest.class.getClassLoader();
public class KerasModelImportTest extends BaseDL4JTest {
@Test
public void testH5WithoutTensorflowScope() throws Exception {

View File

@ -20,6 +20,7 @@ import lombok.extern.slf4j.Slf4j;
import org.apache.commons.io.FileUtils;
import org.deeplearning4j.common.resources.DL4JResources;
import org.deeplearning4j.nn.graph.ComputationGraph;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
import org.deeplearning4j.nn.modelimport.keras.KerasModelImport;
import org.deeplearning4j.nn.modelimport.keras.layers.custom.KerasLRN;
@ -41,7 +42,7 @@ import java.net.URL;
* @author Justin Long (crockpotveggies)
*/
@Slf4j
public class KerasCustomLayerTest {
public class KerasCustomLayerTest extends BaseDL4JTest {
@Rule
public TemporaryFolder testDir = new TemporaryFolder();

View File

@ -19,6 +19,7 @@ package org.deeplearning4j.nn.modelimport.keras.e2e;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.samediff.SameDiffLambdaLayer;
import org.deeplearning4j.nn.graph.ComputationGraph;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
import org.deeplearning4j.nn.modelimport.keras.KerasModel;
import org.deeplearning4j.nn.modelimport.keras.KerasSequentialModel;
@ -44,7 +45,7 @@ import java.nio.file.StandardCopyOption;
*
* @author Max Pumperla
*/
public class KerasLambdaTest {
public class KerasLambdaTest extends BaseDL4JTest {
@Rule
public TemporaryFolder testDir = new TemporaryFolder();

View File

@ -32,10 +32,7 @@ import org.deeplearning4j.nn.graph.ComputationGraph;
import org.deeplearning4j.nn.layers.recurrent.LSTM;
import org.deeplearning4j.nn.layers.recurrent.LastTimeStepLayer;
import org.deeplearning4j.nn.layers.wrapper.BaseWrapperLayer;
import org.deeplearning4j.nn.modelimport.keras.Hdf5Archive;
import org.deeplearning4j.nn.modelimport.keras.KerasModel;
import org.deeplearning4j.nn.modelimport.keras.KerasModelImport;
import org.deeplearning4j.nn.modelimport.keras.KerasSequentialModel;
import org.deeplearning4j.nn.modelimport.keras.*;
import org.deeplearning4j.nn.modelimport.keras.utils.KerasModelBuilder;
import org.deeplearning4j.nn.modelimport.keras.utils.KerasModelUtils;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
@ -78,7 +75,7 @@ import static org.junit.Assert.assertTrue;
* @author dave@skymind.io, Max Pumperla
*/
@Slf4j
public class KerasModelEndToEndTest {
public class KerasModelEndToEndTest extends BaseDL4JTest {
private static final String GROUP_ATTR_INPUTS = "inputs";
private static final String GROUP_ATTR_OUTPUTS = "outputs";
private static final String GROUP_PREDICTIONS = "predictions";

View File

@ -19,6 +19,7 @@ package org.deeplearning4j.nn.modelimport.keras.e2e;
import lombok.extern.slf4j.Slf4j;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.graph.ComputationGraph;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
import org.deeplearning4j.nn.modelimport.keras.KerasModel;
import org.deeplearning4j.nn.modelimport.keras.KerasModelImport;
@ -50,7 +51,7 @@ import java.nio.file.StandardCopyOption;
* @author Max Pumperla
*/
@Slf4j
public class KerasYolo9000PredictTest {
public class KerasYolo9000PredictTest extends BaseDL4JTest {
private static final String DL4J_MODEL_FILE_NAME = ".";
private static ImagePreProcessingScaler IMAGE_PREPROCESSING_SCALER = new ImagePreProcessingScaler(0, 1);

View File

@ -18,6 +18,7 @@ package org.deeplearning4j.nn.modelimport.keras.e2e;
import lombok.extern.slf4j.Slf4j;
import org.deeplearning4j.nn.graph.ComputationGraph;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
import org.deeplearning4j.nn.modelimport.keras.KerasModel;
import org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasSpaceToDepth;
@ -47,7 +48,7 @@ import java.nio.file.StandardCopyOption;
* @author Max Pumperla
*/
@Slf4j
public class KerasYolo9000Test {
public class KerasYolo9000Test extends BaseDL4JTest {
private static final String TEMP_MODEL_FILENAME = "tempModel";
private static final String H5_EXTENSION = ".h5";

View File

@ -17,6 +17,7 @@
package org.deeplearning4j.nn.modelimport.keras.layers.advanced.activation;
import org.deeplearning4j.nn.conf.layers.ActivationLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -31,7 +32,7 @@ import static org.junit.Assert.assertEquals;
/**
* @author Max Pumperla
*/
public class KerasLeakyReLUTest {
public class KerasLeakyReLUTest extends BaseDL4JTest {
private Keras1LayerConfiguration conf1 = new Keras1LayerConfiguration();
private Keras2LayerConfiguration conf2 = new Keras2LayerConfiguration();

View File

@ -18,6 +18,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.advanced.activation;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.PReLULayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -35,7 +36,7 @@ import static org.junit.Assert.assertEquals;
/**
* @author Max Pumperla
*/
public class KerasPReLUTest {
public class KerasPReLUTest extends BaseDL4JTest {
private Keras1LayerConfiguration conf1 = new Keras1LayerConfiguration();
private Keras2LayerConfiguration conf2 = new Keras2LayerConfiguration();

View File

@ -17,6 +17,7 @@
package org.deeplearning4j.nn.modelimport.keras.layers.advanced.activation;
import org.deeplearning4j.nn.conf.layers.ActivationLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -31,7 +32,7 @@ import static org.junit.Assert.assertEquals;
/**
* @author Max Pumperla
*/
public class KerasThresholdedReLUTest {
public class KerasThresholdedReLUTest extends BaseDL4JTest {
private Keras1LayerConfiguration conf1 = new Keras1LayerConfiguration();
private Keras2LayerConfiguration conf2 = new Keras2LayerConfiguration();

View File

@ -19,6 +19,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.convolution;
import org.deeplearning4j.nn.conf.ConvolutionMode;
import org.deeplearning4j.nn.conf.dropout.Dropout;
import org.deeplearning4j.nn.conf.layers.Convolution1DLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.KerasTestUtils;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -40,7 +41,7 @@ import static org.junit.Assert.assertNotNull;
/**
* @author Max Pumperla
*/
public class KerasAtrousConvolution1DTest {
public class KerasAtrousConvolution1DTest extends BaseDL4JTest {
private final String ACTIVATION_KERAS = "linear";
private final String ACTIVATION_DL4J = "identity";

View File

@ -19,6 +19,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.convolution;
import org.deeplearning4j.nn.conf.ConvolutionMode;
import org.deeplearning4j.nn.conf.dropout.Dropout;
import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.KerasTestUtils;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -38,7 +39,7 @@ import static org.junit.Assert.assertEquals;
/**
* @author Max Pumperla
*/
public class KerasAtrousConvolution2DTest {
public class KerasAtrousConvolution2DTest extends BaseDL4JTest {
private final String ACTIVATION_KERAS = "linear";
private final String ACTIVATION_DL4J = "identity";

View File

@ -19,6 +19,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.convolution;
import org.deeplearning4j.nn.conf.ConvolutionMode;
import org.deeplearning4j.nn.conf.dropout.Dropout;
import org.deeplearning4j.nn.conf.layers.Convolution1DLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.KerasTestUtils;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
@ -37,7 +38,7 @@ import static org.junit.Assert.assertEquals;
/**
* @author Max Pumperla
*/
public class KerasConvolution1DTest {
public class KerasConvolution1DTest extends BaseDL4JTest {
private final String ACTIVATION_KERAS = "linear";
private final String ACTIVATION_DL4J = "identity";

View File

@ -19,6 +19,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.convolution;
import org.deeplearning4j.nn.conf.ConvolutionMode;
import org.deeplearning4j.nn.conf.dropout.Dropout;
import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.KerasTestUtils;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
@ -39,7 +40,7 @@ import static org.junit.Assert.assertEquals;
/**
* @author Max Pumperla
*/
public class KerasConvolution2DTest {
public class KerasConvolution2DTest extends BaseDL4JTest {
private final String ACTIVATION_KERAS = "linear";
private final String ACTIVATION_DL4J = "identity";

View File

@ -19,6 +19,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.convolution;
import org.deeplearning4j.nn.conf.ConvolutionMode;
import org.deeplearning4j.nn.conf.dropout.Dropout;
import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.KerasTestUtils;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
@ -43,7 +44,7 @@ import static org.junit.Assert.assertNotNull;
/**
* @author Max Pumperla
*/
public class KerasConvolution3DTest {
public class KerasConvolution3DTest extends BaseDL4JTest {
private final String ACTIVATION_KERAS = "linear";
private final String ACTIVATION_DL4J = "identity";

View File

@ -17,6 +17,7 @@
package org.deeplearning4j.nn.modelimport.keras.layers.convolution;
import org.deeplearning4j.nn.conf.layers.convolutional.Cropping1D;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -32,7 +33,7 @@ import static org.junit.Assert.assertEquals;
/**
* @author Max Pumperla
*/
public class KerasCropping1DTest {
public class KerasCropping1DTest extends BaseDL4JTest {
private final String LAYER_NAME = "cropping_1D_layer";
private final int CROPPING = 2;

View File

@ -17,6 +17,7 @@
package org.deeplearning4j.nn.modelimport.keras.layers.convolution;
import org.deeplearning4j.nn.conf.layers.convolutional.Cropping2D;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -32,7 +33,7 @@ import static org.junit.Assert.assertEquals;
/**
* @author Max Pumperla
*/
public class KerasCropping2DTest {
public class KerasCropping2DTest extends BaseDL4JTest {
private final String LAYER_NAME = "cropping_2D_layer";
private final int[] CROPPING = new int[]{2, 3};

View File

@ -18,6 +18,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.convolution;
import org.deeplearning4j.nn.conf.layers.convolutional.Cropping2D;
import org.deeplearning4j.nn.conf.layers.convolutional.Cropping3D;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -34,7 +35,7 @@ import static org.junit.Assert.assertEquals;
/**
* @author Max Pumperla
*/
public class KerasCropping3DTest {
public class KerasCropping3DTest extends BaseDL4JTest {
private final String LAYER_NAME = "cropping_3D_layer";
private final int[] CROPPING = new int[]{2, 3, 5};

View File

@ -19,6 +19,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.convolution;
import org.deeplearning4j.nn.conf.ConvolutionMode;
import org.deeplearning4j.nn.conf.dropout.Dropout;
import org.deeplearning4j.nn.conf.layers.Deconvolution2D;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.KerasTestUtils;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
@ -39,7 +40,7 @@ import static org.junit.Assert.assertEquals;
/**
* @author Max Pumperla
*/
public class KerasDeconvolution2DTest {
public class KerasDeconvolution2DTest extends BaseDL4JTest {
private final String ACTIVATION_KERAS = "linear";
private final String ACTIVATION_DL4J = "identity";

View File

@ -19,6 +19,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.convolution;
import org.deeplearning4j.nn.conf.ConvolutionMode;
import org.deeplearning4j.nn.conf.dropout.Dropout;
import org.deeplearning4j.nn.conf.layers.DepthwiseConvolution2D;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
import org.deeplearning4j.nn.modelimport.keras.KerasTestUtils;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
@ -42,7 +43,7 @@ import static org.junit.Assert.assertNotNull;
/**
* @author Max Pumperla
*/
public class KerasDepthwiseConvolution2DTest {
public class KerasDepthwiseConvolution2DTest extends BaseDL4JTest {
private final String ACTIVATION_KERAS = "linear";
private final String ACTIVATION_DL4J = "identity";

View File

@ -19,6 +19,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.convolution;
import org.deeplearning4j.nn.conf.ConvolutionMode;
import org.deeplearning4j.nn.conf.dropout.Dropout;
import org.deeplearning4j.nn.conf.layers.SeparableConvolution2D;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.KerasTestUtils;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
@ -39,7 +40,7 @@ import static org.junit.Assert.assertEquals;
/**
* @author Max Pumperla
*/
public class KerasSeparableConvolution2DTest {
public class KerasSeparableConvolution2DTest extends BaseDL4JTest {
private final String ACTIVATION_KERAS = "linear";
private final String ACTIVATION_DL4J = "identity";

View File

@ -18,6 +18,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.convolution;
import org.deeplearning4j.nn.conf.layers.Upsampling1D;
import org.deeplearning4j.nn.conf.layers.Upsampling2D;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -35,7 +36,7 @@ import static org.junit.Assert.assertEquals;
/**
* @author Max Pumperla
*/
public class KerasUpsampling1DTest {
public class KerasUpsampling1DTest extends BaseDL4JTest {
private final String LAYER_NAME = "upsampling_1D_layer";
private int size = 4;

View File

@ -18,6 +18,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.convolution;
import org.deeplearning4j.nn.conf.layers.Upsampling2D;
import org.deeplearning4j.nn.conf.layers.ZeroPadding1DLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -35,7 +36,7 @@ import static org.junit.Assert.assertEquals;
/**
* @author Max Pumperla
*/
public class KerasUpsampling2DTest {
public class KerasUpsampling2DTest extends BaseDL4JTest {
private final String LAYER_NAME = "upsampling_2D_layer";
private int[] size = new int[]{2, 2};

View File

@ -17,6 +17,7 @@
package org.deeplearning4j.nn.modelimport.keras.layers.convolution;
import org.deeplearning4j.nn.conf.layers.Upsampling3D;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -33,7 +34,7 @@ import static org.junit.Assert.assertEquals;
/**
* @author Max Pumperla
*/
public class KerasUpsampling3DTest {
public class KerasUpsampling3DTest extends BaseDL4JTest {
private final String LAYER_NAME = "upsampling_3D_layer";
private int[] size = new int[]{2, 2, 2};

View File

@ -17,6 +17,7 @@
package org.deeplearning4j.nn.modelimport.keras.layers.convolution;
import org.deeplearning4j.nn.conf.layers.ZeroPadding1DLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -31,7 +32,7 @@ import static org.junit.Assert.assertEquals;
/**
* @author Max Pumperla
*/
public class KerasZeroPadding1DTest {
public class KerasZeroPadding1DTest extends BaseDL4JTest {
private Keras1LayerConfiguration conf1 = new Keras1LayerConfiguration();
private Keras2LayerConfiguration conf2 = new Keras2LayerConfiguration();

View File

@ -17,6 +17,7 @@
package org.deeplearning4j.nn.modelimport.keras.layers.convolution;
import org.deeplearning4j.nn.conf.layers.ZeroPaddingLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -32,7 +33,7 @@ import static org.junit.Assert.assertEquals;
/**
* @author Max Pumperla
*/
public class KerasZeroPadding2DTest {
public class KerasZeroPadding2DTest extends BaseDL4JTest {
private final String LAYER_NAME = "zero_padding_2D_layer";
private final int[] ZERO_PADDING = new int[]{2, 3};

View File

@ -18,6 +18,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.convolution;
import org.deeplearning4j.nn.conf.layers.ZeroPadding3DLayer;
import org.deeplearning4j.nn.conf.layers.ZeroPaddingLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -34,7 +35,7 @@ import static org.junit.Assert.assertEquals;
/**
* @author Max Pumperla
*/
public class KerasZeroPadding3DTest {
public class KerasZeroPadding3DTest extends BaseDL4JTest {
private final String LAYER_NAME = "zero_padding_3D_layer";
private final int[] ZERO_PADDING = new int[]{2, 3, 4};

View File

@ -17,6 +17,7 @@
package org.deeplearning4j.nn.modelimport.keras.layers.core;
import org.deeplearning4j.nn.conf.layers.ActivationLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -27,7 +28,7 @@ import java.util.Map;
import static org.junit.Assert.assertEquals;
public class KerasActivationLayer {
public class KerasActivationLayer extends BaseDL4JTest {
private final String ACTIVATION_KERAS = "linear";
private final String ACTIVATION_DL4J = "identity";

View File

@ -18,6 +18,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.core;
import org.deeplearning4j.nn.conf.dropout.Dropout;
import org.deeplearning4j.nn.conf.layers.DenseLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.KerasTestUtils;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
@ -39,7 +40,7 @@ import static org.junit.Assert.assertNotNull;
/**
* @author Max Pumperla
*/
public class KerasDenseTest {
public class KerasDenseTest extends BaseDL4JTest {
private Integer keras1 = 1;
private Integer keras2 = 2;

View File

@ -18,6 +18,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.core;
import org.deeplearning4j.nn.conf.dropout.Dropout;
import org.deeplearning4j.nn.conf.layers.DropoutLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -31,7 +32,7 @@ import static org.junit.Assert.assertEquals;
/**
* @author Max Pumperla
*/
public class KerasDropoutTest {
public class KerasDropoutTest extends BaseDL4JTest {
String LAYER_NAME = "dropout";
private final double DROPOUT_KERAS = 0.3;

View File

@ -17,6 +17,7 @@
package org.deeplearning4j.nn.modelimport.keras.layers.core;
import org.deeplearning4j.nn.conf.layers.util.MaskZeroLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -31,7 +32,7 @@ import static org.junit.Assert.assertEquals;
/**
* @author Max Pumperla
*/
public class KerasMaskingTest {
public class KerasMaskingTest extends BaseDL4JTest {
private Keras1LayerConfiguration conf1 = new Keras1LayerConfiguration();

View File

@ -17,6 +17,7 @@
package org.deeplearning4j.nn.modelimport.keras.layers.core;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -33,7 +34,7 @@ import static org.junit.Assert.assertEquals;
/**
* @author Max Pumperla
*/
public class KerasPermuteTest {
public class KerasPermuteTest extends BaseDL4JTest {
private Integer keras1 = 1;
private Integer keras2 = 2;

View File

@ -17,6 +17,7 @@
package org.deeplearning4j.nn.modelimport.keras.layers.core;
import org.deeplearning4j.nn.conf.layers.misc.RepeatVector;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -30,7 +31,7 @@ import static org.junit.Assert.assertEquals;
/**
* @author Max Pumperla
*/
public class KerasRepeatVectorTest {
public class KerasRepeatVectorTest extends BaseDL4JTest {
String LAYER_NAME = "repeat";
private int REPEAT = 4;

View File

@ -17,6 +17,7 @@
package org.deeplearning4j.nn.modelimport.keras.layers.core;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -36,7 +37,7 @@ import static org.junit.Assert.assertEquals;
/**
* @author Max Pumperla
*/
public class KerasReshapeTest {
public class KerasReshapeTest extends BaseDL4JTest {
private Integer keras1 = 1;
private Integer keras2 = 2;

View File

@ -18,6 +18,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.core;
import org.deeplearning4j.nn.conf.dropout.SpatialDropout;
import org.deeplearning4j.nn.conf.layers.DropoutLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -31,7 +32,7 @@ import static org.junit.Assert.assertEquals;
/**
* @author Max Pumperla
*/
public class KerasSpatialDropout2DTest {
public class KerasSpatialDropout2DTest extends BaseDL4JTest {
String LAYER_NAME = "spatial_dropout_2d";
private final double RATE_KERAS = 0.3;

View File

@ -17,6 +17,7 @@
package org.deeplearning4j.nn.modelimport.keras.layers.embeddings;
import org.deeplearning4j.nn.conf.layers.EmbeddingSequenceLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -36,7 +37,7 @@ import static org.junit.Assert.assertEquals;
/**
* @author Max Pumperla
*/
public class KerasEmbeddingTest {
public class KerasEmbeddingTest extends BaseDL4JTest {
private final String LAYER_NAME = "embedding_sequence_layer";
private final String INIT_KERAS = "glorot_normal";

View File

@ -21,6 +21,7 @@ import org.deeplearning4j.nn.conf.dropout.Dropout;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.LocallyConnected1D;
import org.deeplearning4j.nn.conf.layers.LocallyConnected2D;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.KerasTestUtils;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
@ -39,7 +40,7 @@ import static org.junit.Assert.assertEquals;
/**
* @author Max Pumperla
*/
public class KerasLocallyConnected1DTest {
public class KerasLocallyConnected1DTest extends BaseDL4JTest {
private final String ACTIVATION_KERAS = "linear";
private final String ACTIVATION_DL4J = "identity";

View File

@ -20,6 +20,7 @@ import org.deeplearning4j.nn.conf.ConvolutionMode;
import org.deeplearning4j.nn.conf.dropout.Dropout;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.LocallyConnected2D;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.KerasTestUtils;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
@ -39,7 +40,7 @@ import static org.junit.Assert.assertNotNull;
/**
* @author Max Pumperla
*/
public class KerasLocallyConnected2DTest {
public class KerasLocallyConnected2DTest extends BaseDL4JTest {
private final String ACTIVATION_KERAS = "linear";
private final String ACTIVATION_DL4J = "identity";

View File

@ -18,6 +18,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.noise;
import org.deeplearning4j.nn.conf.dropout.AlphaDropout;
import org.deeplearning4j.nn.conf.layers.DropoutLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -31,7 +32,7 @@ import static org.junit.Assert.assertEquals;
/**
* @author Max Pumperla
*/
public class KerasAlphaDropoutTest {
public class KerasAlphaDropoutTest extends BaseDL4JTest {
String LAYER_NAME = "alpha_dropout";
private final double RATE_KERAS = 0.3;

View File

@ -18,6 +18,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.noise;
import org.deeplearning4j.nn.conf.dropout.GaussianDropout;
import org.deeplearning4j.nn.conf.layers.DropoutLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -31,7 +32,7 @@ import static org.junit.Assert.assertEquals;
/**
* @author Max Pumperla
*/
public class KerasGaussianDropoutTest {
public class KerasGaussianDropoutTest extends BaseDL4JTest {
String LAYER_NAME = "gaussian_dropout";
private final double RATE_KERAS = 0.3;

View File

@ -18,6 +18,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.noise;
import org.deeplearning4j.nn.conf.dropout.GaussianNoise;
import org.deeplearning4j.nn.conf.layers.DropoutLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -31,7 +32,7 @@ import static org.junit.Assert.assertEquals;
/**
* @author Max Pumperla
*/
public class KerasGaussianNoiseTest {
public class KerasGaussianNoiseTest extends BaseDL4JTest {
String LAYER_NAME = "gaussian_noise";
private final double STDDEV = 0.3;

View File

@ -17,6 +17,7 @@
package org.deeplearning4j.nn.modelimport.keras.layers.normalization;
import org.deeplearning4j.nn.conf.layers.BatchNormalization;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -32,7 +33,7 @@ import static org.junit.Assert.assertEquals;
/**
* @author Max Pumperla
*/
public class KerasBatchNormalizationTest {
public class KerasBatchNormalizationTest extends BaseDL4JTest {
public static final String PARAM_NAME_BETA = "beta";
private final String LAYER_NAME = "batch_norm_layer";

View File

@ -19,6 +19,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.pooling;
import org.deeplearning4j.nn.conf.ConvolutionMode;
import org.deeplearning4j.nn.conf.layers.PoolingType;
import org.deeplearning4j.nn.conf.layers.Subsampling1DLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -33,7 +34,7 @@ import static org.junit.Assert.assertEquals;
/**
* @author Max Pumperla
*/
public class KerasPooling1DTest {
public class KerasPooling1DTest extends BaseDL4JTest {
private final String LAYER_NAME = "test_layer";
private final int[] KERNEL_SIZE = new int[]{2};

View File

@ -19,6 +19,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.pooling;
import org.deeplearning4j.nn.conf.ConvolutionMode;
import org.deeplearning4j.nn.conf.layers.PoolingType;
import org.deeplearning4j.nn.conf.layers.SubsamplingLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -35,7 +36,7 @@ import static org.junit.Assert.assertEquals;
/**
* @author Max Pumperla
*/
public class KerasPooling2DTest {
public class KerasPooling2DTest extends BaseDL4JTest {
private final String LAYER_NAME = "test_layer";
private final int[] KERNEL_SIZE = new int[]{1, 2};

View File

@ -20,6 +20,7 @@ import org.deeplearning4j.nn.conf.ConvolutionMode;
import org.deeplearning4j.nn.conf.layers.PoolingType;
import org.deeplearning4j.nn.conf.layers.Subsampling3DLayer;
import org.deeplearning4j.nn.conf.layers.SubsamplingLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -36,7 +37,7 @@ import static org.junit.Assert.assertEquals;
/**
* @author Max Pumperla
*/
public class KerasPooling3DTest {
public class KerasPooling3DTest extends BaseDL4JTest {
private final String LAYER_NAME = "pooling_3d";
private final int[] KERNEL_SIZE = new int[]{2, 2, 2};

View File

@ -21,6 +21,7 @@ import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.LSTM;
import org.deeplearning4j.nn.conf.layers.recurrent.LastTimeStep;
import org.deeplearning4j.nn.conf.layers.util.MaskZeroLayer;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.KerasTestUtils;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
@ -44,7 +45,7 @@ import static org.junit.Assert.assertNotNull;
/**
* @author Max Pumperla
*/
public class KerasLSTMTest {
public class KerasLSTMTest extends BaseDL4JTest {
private final String ACTIVATION_KERAS = "linear";
private final String ACTIVATION_DL4J = "identity";

View File

@ -19,6 +19,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.recurrent;
import org.deeplearning4j.nn.conf.dropout.Dropout;
import org.deeplearning4j.nn.conf.layers.recurrent.LastTimeStep;
import org.deeplearning4j.nn.conf.layers.recurrent.SimpleRnn;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.KerasTestUtils;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
@ -35,7 +36,7 @@ import static org.junit.Assert.assertEquals;
/**
* @author Max Pumperla
*/
public class KerasSimpleRnnTest {
public class KerasSimpleRnnTest extends BaseDL4JTest {
private final String ACTIVATION = "sigmoid";
private final String LAYER_NAME = "simple_rnn_layer";

View File

@ -18,6 +18,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.wrappers;
import org.deeplearning4j.nn.conf.layers.LSTM;
import org.deeplearning4j.nn.conf.layers.recurrent.Bidirectional;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
@ -33,7 +34,7 @@ import static org.junit.Assert.assertEquals;
/**
* @author Max Pumperla
*/
public class KerasBidirectionalTest {
public class KerasBidirectionalTest extends BaseDL4JTest {
private final String ACTIVATION_KERAS = "linear";
private final String ACTIVATION_DL4J = "identity";

View File

@ -17,6 +17,7 @@
package org.deeplearning4j.nn.modelimport.keras.optimizers;
import org.deeplearning4j.config.DL4JSystemProperties;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.KerasModel;
import org.deeplearning4j.nn.modelimport.keras.KerasSequentialModel;
import org.deeplearning4j.nn.modelimport.keras.e2e.KerasModelEndToEndTest;
@ -33,7 +34,7 @@ import java.nio.file.StandardCopyOption;
import static java.io.File.createTempFile;
public class OptimizerImport {
public class OptimizerImport extends BaseDL4JTest {
@Test
public void importAdam() throws Exception {

View File

@ -16,6 +16,7 @@
package org.deeplearning4j.nn.modelimport.keras.preprocessing.sequence;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.preprocessing.text.KerasTokenizer;
import org.junit.Test;
@ -29,7 +30,7 @@ import java.io.IOException;
*
* @author Max Pumperla
*/
public class TimeSeriesGeneratorImportTest {
public class TimeSeriesGeneratorImportTest extends BaseDL4JTest {
@Test
public void importTimeSeriesTest() throws IOException, InvalidKerasConfigurationException {

View File

@ -16,6 +16,7 @@
package org.deeplearning4j.nn.modelimport.keras.preprocessing.sequence;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.junit.Test;
import org.nd4j.linalg.api.ndarray.INDArray;
@ -24,7 +25,7 @@ import org.nd4j.linalg.primitives.Pair;
import static org.junit.Assert.assertEquals;
public class TimeSeriesGeneratorTest {
public class TimeSeriesGeneratorTest extends BaseDL4JTest {
@Test
public void tsGeneratorTest() throws InvalidKerasConfigurationException {

View File

@ -16,6 +16,7 @@
package org.deeplearning4j.nn.modelimport.keras.preprocessing.text;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.junit.Test;
import org.nd4j.linalg.io.ClassPathResource;
@ -33,7 +34,7 @@ import static org.junit.Assert.assertTrue;
*
* @author Max Pumperla
*/
public class TokenizerImportTest {
public class TokenizerImportTest extends BaseDL4JTest {
ClassLoader classLoader = getClass().getClassLoader();

View File

@ -16,6 +16,7 @@
package org.deeplearning4j.nn.modelimport.keras.preprocessing.text;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.junit.Test;
import org.nd4j.linalg.api.ndarray.INDArray;
@ -31,7 +32,7 @@ import static org.junit.Assert.assertEquals;
*
* @author Max Pumperla
*/
public class TokenizerTest {
public class TokenizerTest extends BaseDL4JTest {
@Test
public void tokenizerBasics() {

View File

@ -19,6 +19,7 @@ package org.deeplearning4j.nn.modelimport.keras.weights;
import lombok.extern.slf4j.Slf4j;
import lombok.val;
import org.deeplearning4j.nn.graph.ComputationGraph;
import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
import org.deeplearning4j.nn.modelimport.keras.KerasModel;
import org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasSpaceToDepth;
@ -42,7 +43,7 @@ import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
@Slf4j
public class KerasWeightSettingTests {
public class KerasWeightSettingTests extends BaseDL4JTest {
@Rule
public final TemporaryFolder testDir = new TemporaryFolder();

View File

@ -0,0 +1,140 @@
/*******************************************************************************
* Copyright (c) 2015-2018 Skymind, Inc.
*
* This program and the accompanying materials are made available under the
* terms of the Apache License, Version 2.0 which is available at
* https://www.apache.org/licenses/LICENSE-2.0.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
* SPDX-License-Identifier: Apache-2.0
******************************************************************************/
package org.deeplearning4j.nearestneighbor.server;
import lombok.extern.slf4j.Slf4j;
import org.bytedeco.javacpp.Pointer;
import org.junit.After;
import org.junit.Before;
import org.junit.Rule;
import org.junit.rules.TestName;
import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.api.memory.MemoryWorkspace;
import org.nd4j.linalg.api.ops.executioner.OpExecutioner;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.profiler.ProfilerConfig;
import java.lang.management.ManagementFactory;
import java.util.List;
import java.util.Map;
import java.util.Properties;
@Slf4j
public class BaseDL4JTest {
@Rule
public TestName name = new TestName();
protected long startTime;
protected int threadCountBefore;
/**
* Override this to set the profiling mode for the tests defined in the child class
*/
public OpExecutioner.ProfilingMode getProfilingMode(){
return OpExecutioner.ProfilingMode.SCOPE_PANIC;
}
/**
* Override this to set the datatype of the tests defined in the child class
*/
public DataType getDataType(){
return DataType.DOUBLE;
}
public DataType getDefaultFPDataType(){
return getDataType();
}
@Before
public void beforeTest(){
log.info("{}.{}", getClass().getSimpleName(), name.getMethodName());
Nd4j.getExecutioner().setProfilingMode(getProfilingMode());
Nd4j.getExecutioner().setProfilingConfig(ProfilerConfig.builder().build());
Nd4j.setDefaultDataTypes(getDataType(), getDefaultFPDataType());
startTime = System.currentTimeMillis();
threadCountBefore = ManagementFactory.getThreadMXBean().getThreadCount();
}
@After
public void afterTest(){
//Attempt to keep workspaces isolated between tests
Nd4j.getWorkspaceManager().destroyAllWorkspacesForCurrentThread();
MemoryWorkspace currWS = Nd4j.getMemoryManager().getCurrentWorkspace();
Nd4j.getMemoryManager().setCurrentWorkspace(null);
if(currWS != null){
//Not really safe to continue testing under this situation... other tests will likely fail with obscure
// errors that are hard to track back to this
log.error("Open workspace leaked from test! Exiting - {}, isOpen = {} - {}", currWS.getId(), currWS.isScopeActive(), currWS);
System.exit(1);
}
StringBuilder sb = new StringBuilder();
long maxPhys = Pointer.maxPhysicalBytes();
long maxBytes = Pointer.maxBytes();
long currPhys = Pointer.physicalBytes();
long currBytes = Pointer.totalBytes();
long jvmTotal = Runtime.getRuntime().totalMemory();
long jvmMax = Runtime.getRuntime().maxMemory();
int threadsAfter = ManagementFactory.getThreadMXBean().getThreadCount();
long duration = System.currentTimeMillis() - startTime;
sb.append(getClass().getSimpleName()).append(".").append(name.getMethodName())
.append(": ").append(duration).append(" ms")
.append(", threadCount: (").append(threadCountBefore).append("->").append(threadsAfter).append(")")
.append(", jvmTotal=").append(jvmTotal)
.append(", jvmMax=").append(jvmMax)
.append(", totalBytes=").append(currBytes).append(", maxBytes=").append(maxBytes)
.append(", currPhys=").append(currPhys).append(", maxPhys=").append(maxPhys);
List<MemoryWorkspace> ws = Nd4j.getWorkspaceManager().getAllWorkspacesForCurrentThread();
if(ws != null && ws.size() > 0){
long currSize = 0;
for(MemoryWorkspace w : ws){
currSize += w.getCurrentSize();
}
if(currSize > 0){
sb.append(", threadWSSize=").append(currSize)
.append(" (").append(ws.size()).append(" WSs)");
}
}
Properties p = Nd4j.getExecutioner().getEnvironmentInformation();
Object o = p.get("cuda.devicesInformation");
if(o instanceof List){
List<Map<String,Object>> l = (List<Map<String, Object>>) o;
if(l.size() > 0) {
sb.append(" [").append(l.size())
.append(" GPUs: ");
for (int i = 0; i < l.size(); i++) {
Map<String,Object> m = l.get(i);
if(i > 0)
sb.append(",");
sb.append("(").append(m.get("cuda.freeMemory")).append(" free, ")
.append(m.get("cuda.totalMemory")).append(" total)");
}
sb.append("]");
}
}
log.info(sb.toString());
}
}

View File

@ -44,7 +44,7 @@ import static org.junit.Assert.assertEquals;
/**
* Created by agibsonccc on 4/27/17.
*/
public class NearestNeighborTest {
public class NearestNeighborTest extends BaseDL4JTest {
@Rule
public TemporaryFolder testDir = new TemporaryFolder();

View File

@ -175,7 +175,7 @@ public class KDTree implements Serializable {
return Pair.of(Double.POSITIVE_INFINITY, null);
int _discNext = (_disc + 1) % dims;
double dist2 = Nd4j.getExecutioner().execAndReturn(new EuclideanDistance(point, Nd4j.zeros(point.shape()))).getFinalResult().doubleValue();
double dist2 = Nd4j.getExecutioner().execAndReturn(new EuclideanDistance(point, Nd4j.zeros(point.dataType(), point.shape()))).getFinalResult().doubleValue();
if (dist2 < dist) {
best = node.getPoint();
dist = dist2;

View File

@ -0,0 +1,140 @@
/*******************************************************************************
* Copyright (c) 2015-2018 Skymind, Inc.
*
* This program and the accompanying materials are made available under the
* terms of the Apache License, Version 2.0 which is available at
* https://www.apache.org/licenses/LICENSE-2.0.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
* SPDX-License-Identifier: Apache-2.0
******************************************************************************/
package org.deeplearning4j.clustering;
import lombok.extern.slf4j.Slf4j;
import org.bytedeco.javacpp.Pointer;
import org.junit.After;
import org.junit.Before;
import org.junit.Rule;
import org.junit.rules.TestName;
import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.api.memory.MemoryWorkspace;
import org.nd4j.linalg.api.ops.executioner.OpExecutioner;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.profiler.ProfilerConfig;
import java.lang.management.ManagementFactory;
import java.util.List;
import java.util.Map;
import java.util.Properties;
@Slf4j
public class BaseDL4JTest {
@Rule
public TestName name = new TestName();
protected long startTime;
protected int threadCountBefore;
/**
* Override this to set the profiling mode for the tests defined in the child class
*/
public OpExecutioner.ProfilingMode getProfilingMode(){
return OpExecutioner.ProfilingMode.SCOPE_PANIC;
}
/**
* Override this to set the datatype of the tests defined in the child class
*/
public DataType getDataType(){
return DataType.DOUBLE;
}
public DataType getDefaultFPDataType(){
return getDataType();
}
@Before
public void beforeTest(){
log.info("{}.{}", getClass().getSimpleName(), name.getMethodName());
Nd4j.getExecutioner().setProfilingMode(getProfilingMode());
Nd4j.getExecutioner().setProfilingConfig(ProfilerConfig.builder().build());
Nd4j.setDefaultDataTypes(getDataType(), getDefaultFPDataType());
startTime = System.currentTimeMillis();
threadCountBefore = ManagementFactory.getThreadMXBean().getThreadCount();
}
@After
public void afterTest(){
//Attempt to keep workspaces isolated between tests
Nd4j.getWorkspaceManager().destroyAllWorkspacesForCurrentThread();
MemoryWorkspace currWS = Nd4j.getMemoryManager().getCurrentWorkspace();
Nd4j.getMemoryManager().setCurrentWorkspace(null);
if(currWS != null){
//Not really safe to continue testing under this situation... other tests will likely fail with obscure
// errors that are hard to track back to this
log.error("Open workspace leaked from test! Exiting - {}, isOpen = {} - {}", currWS.getId(), currWS.isScopeActive(), currWS);
System.exit(1);
}
StringBuilder sb = new StringBuilder();
long maxPhys = Pointer.maxPhysicalBytes();
long maxBytes = Pointer.maxBytes();
long currPhys = Pointer.physicalBytes();
long currBytes = Pointer.totalBytes();
long jvmTotal = Runtime.getRuntime().totalMemory();
long jvmMax = Runtime.getRuntime().maxMemory();
int threadsAfter = ManagementFactory.getThreadMXBean().getThreadCount();
long duration = System.currentTimeMillis() - startTime;
sb.append(getClass().getSimpleName()).append(".").append(name.getMethodName())
.append(": ").append(duration).append(" ms")
.append(", threadCount: (").append(threadCountBefore).append("->").append(threadsAfter).append(")")
.append(", jvmTotal=").append(jvmTotal)
.append(", jvmMax=").append(jvmMax)
.append(", totalBytes=").append(currBytes).append(", maxBytes=").append(maxBytes)
.append(", currPhys=").append(currPhys).append(", maxPhys=").append(maxPhys);
List<MemoryWorkspace> ws = Nd4j.getWorkspaceManager().getAllWorkspacesForCurrentThread();
if(ws != null && ws.size() > 0){
long currSize = 0;
for(MemoryWorkspace w : ws){
currSize += w.getCurrentSize();
}
if(currSize > 0){
sb.append(", threadWSSize=").append(currSize)
.append(" (").append(ws.size()).append(" WSs)");
}
}
Properties p = Nd4j.getExecutioner().getEnvironmentInformation();
Object o = p.get("cuda.devicesInformation");
if(o instanceof List){
List<Map<String,Object>> l = (List<Map<String, Object>>) o;
if(l.size() > 0) {
sb.append(" [").append(l.size())
.append(" GPUs: ");
for (int i = 0; i < l.size(); i++) {
Map<String,Object> m = l.get(i);
if(i > 0)
sb.append(",");
sb.append("(").append(m.get("cuda.freeMemory")).append(" free, ")
.append(m.get("cuda.totalMemory")).append(" total)");
}
sb.append("]");
}
}
log.info(sb.toString());
}
}

View File

@ -18,6 +18,7 @@ package org.deeplearning4j.clustering.kdtree;
import com.google.common.primitives.Doubles;
import lombok.val;
import org.deeplearning4j.clustering.BaseDL4JTest;
import org.joda.time.Duration;
import org.junit.Before;
import org.junit.BeforeClass;
@ -40,7 +41,7 @@ import static org.junit.Assert.assertTrue;
/**
* Created by agibsonccc on 1/1/15.
*/
public class KDTreeTest {
public class KDTreeTest extends BaseDL4JTest {
private KDTree kdTree;

View File

@ -17,6 +17,7 @@
package org.deeplearning4j.clustering.kmeans;
import org.apache.commons.lang3.time.StopWatch;
import org.deeplearning4j.clustering.BaseDL4JTest;
import org.deeplearning4j.clustering.algorithm.Distance;
import org.deeplearning4j.clustering.cluster.*;
import org.junit.Ignore;
@ -33,7 +34,7 @@ import static org.junit.Assert.fail;
/**
* Created by agibsonccc on 7/2/17.
*/
public class KMeansTest {
public class KMeansTest extends BaseDL4JTest {
@Test
public void testKMeans() {

View File

@ -16,6 +16,7 @@
package org.deeplearning4j.clustering.lsh;
import org.deeplearning4j.clustering.BaseDL4JTest;
import org.junit.After;
import org.junit.Before;
import org.junit.Ignore;
@ -31,7 +32,7 @@ import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
public class RandomProjectionLSHTest {
public class RandomProjectionLSHTest extends BaseDL4JTest {
int hashLength = 31;
int numTables = 2;

View File

@ -16,6 +16,7 @@
package org.deeplearning4j.clustering.quadtree;
import org.deeplearning4j.clustering.BaseDL4JTest;
import org.junit.Test;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
@ -26,7 +27,7 @@ import static org.junit.Assert.assertTrue;
/**
* Created by agibsonccc on 1/2/15.
*/
public class QuadTreeTest {
public class QuadTreeTest extends BaseDL4JTest {
@Test
public void testQuadTree() {

View File

@ -16,6 +16,7 @@
package org.deeplearning4j.clustering.randomprojection;
import org.deeplearning4j.clustering.BaseDL4JTest;
import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator;
import org.junit.Before;
import org.junit.Test;
@ -31,7 +32,7 @@ import java.util.List;
import static org.junit.Assert.*;
public class RPTreeTest {
public class RPTreeTest extends BaseDL4JTest {
@Before
public void setUp() {

View File

@ -16,13 +16,14 @@
package org.deeplearning4j.clustering.randomprojection;
import org.deeplearning4j.clustering.BaseDL4JTest;
import org.junit.Test;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
import static org.junit.Assert.assertEquals;
public class RPUtilsTest {
public class RPUtilsTest extends BaseDL4JTest {
@Test
public void testDistanceComputeBatch() {

View File

@ -18,6 +18,7 @@ package org.deeplearning4j.clustering.sptree;
import com.google.common.util.concurrent.AtomicDouble;
import org.apache.commons.lang3.time.StopWatch;
import org.deeplearning4j.clustering.BaseDL4JTest;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;
@ -33,7 +34,7 @@ import static org.junit.Assert.*;
/**
* @author Adam Gibson
*/
public class SPTreeTest {
public class SPTreeTest extends BaseDL4JTest {
@Before
public void setUp() {

Some files were not shown because too many files have changed in this diff Show More