Alex Black 32e5cc1945
First round of runtime test improvements (#7875)
* Capsnet test runtime improvements

* Slow test speedups

* Next round of test speed improvements

* More test improvements

* Improve test speed

* Next round of test speedups

* Another round

* More test speedups

* Another round

* Another round of test speedups

* Another round of speedups...

* CuDNN test speedups + more tests extending BaseDL4JTest

* Minor fix + more BaseDL4JTest use in other modules
2019-06-13 20:40:40 +10:00

339 lines
15 KiB
Java

/*******************************************************************************
* Copyright (c) 2015-2018 Skymind, Inc.
*
* This program and the accompanying materials are made available under the
* terms of the Apache License, Version 2.0 which is available at
* https://www.apache.org/licenses/LICENSE-2.0.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
* SPDX-License-Identifier: Apache-2.0
******************************************************************************/
package org.deeplearning4j.gradientcheck;
import org.deeplearning4j.BaseDL4JTest;
import org.deeplearning4j.TestUtils;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
import org.deeplearning4j.nn.conf.layers.OutputLayer;
import org.deeplearning4j.nn.conf.layers.variational.*;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
import org.deeplearning4j.nn.weights.WeightInit;
import org.junit.Test;
import org.nd4j.linalg.activations.Activation;
import org.nd4j.linalg.activations.impl.ActivationTanH;
import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.api.ops.random.impl.BernoulliDistribution;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.indexing.NDArrayIndex;
import org.nd4j.linalg.learning.config.NoOp;
import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction;
import org.nd4j.linalg.lossfunctions.impl.LossMSE;
import java.util.Arrays;
import static org.junit.Assert.assertTrue;
/**
* @author Alex Black
*/
public class VaeGradientCheckTests extends BaseDL4JTest {
private static final boolean PRINT_RESULTS = true;
private static final boolean RETURN_ON_FIRST_FAILURE = false;
private static final double DEFAULT_EPS = 1e-6;
private static final double DEFAULT_MAX_REL_ERROR = 1e-3;
private static final double DEFAULT_MIN_ABS_ERROR = 1e-8;
static {
Nd4j.setDataType(DataType.DOUBLE);
}
@Test
public void testVaeAsMLP() {
//Post pre-training: a VAE can be used as a MLP, by taking the mean value from p(z|x) as the output
//This gradient check tests this part
Activation[] activFns = {Activation.IDENTITY, Activation.TANH, Activation.IDENTITY, Activation.TANH, Activation.IDENTITY, Activation.TANH};
LossFunction[] lossFunctions = {LossFunction.MCXENT, LossFunction.MCXENT, LossFunction.MSE, LossFunction.MSE, LossFunction.MCXENT, LossFunction.MSE};
Activation[] outputActivations = {Activation.SOFTMAX, Activation.SOFTMAX, Activation.TANH, Activation.TANH, Activation.SOFTMAX, Activation.TANH};
//use l2vals[i] with l1vals[i]
double[] l2vals = {0.4, 0.0, 0.4, 0.4, 0.0, 0.0};
double[] l1vals = {0.0, 0.0, 0.5, 0.0, 0.0, 0.5};
double[] biasL2 = {0.0, 0.0, 0.0, 0.2, 0.0, 0.4};
double[] biasL1 = {0.0, 0.0, 0.6, 0.0, 0.0, 0.0};
int[][] encoderLayerSizes = new int[][] {{5}, {5}, {5, 6}, {5, 6}, {5}, {5, 6}};
int[][] decoderLayerSizes = new int[][] {{6}, {7, 8}, {6}, {7, 8}, {6}, {7, 8}};
int[] minibatches = new int[]{1,5,4,3,1,4};
Nd4j.getRandom().setSeed(12345);
for( int i=0; i<activFns.length; i++ ){
LossFunction lf = lossFunctions[i];
Activation outputActivation = outputActivations[i];
double l2 = l2vals[i];
double l1 = l1vals[i];
int[] encoderSizes = encoderLayerSizes[i];
int[] decoderSizes = decoderLayerSizes[i];
int minibatch = minibatches[i];
INDArray input = Nd4j.rand(minibatch, 4);
INDArray labels = Nd4j.create(minibatch, 3);
for (int j = 0; j < minibatch; j++) {
labels.putScalar(j, j % 3, 1.0);
}
Activation afn = activFns[i];
MultiLayerConfiguration conf =
new NeuralNetConfiguration.Builder().l2(l2).l1(l1)
.dataType(DataType.DOUBLE)
.updater(new NoOp())
.l2Bias(biasL2[i]).l1Bias(biasL1[i])
.updater(new NoOp()).seed(12345L).list()
.layer(0, new VariationalAutoencoder.Builder().nIn(4)
.nOut(3).encoderLayerSizes(encoderSizes)
.decoderLayerSizes(decoderSizes)
.dist(new NormalDistribution(0, 1))
.activation(afn)
.build())
.layer(1, new OutputLayer.Builder(lf)
.activation(outputActivation).nIn(3).nOut(3)
.dist(new NormalDistribution(0, 1))
.build())
.build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
String msg = "testVaeAsMLP() - activationFn=" + afn + ", lossFn=" + lf
+ ", outputActivation=" + outputActivation + ", encLayerSizes = "
+ Arrays.toString(encoderSizes) + ", decLayerSizes = "
+ Arrays.toString(decoderSizes) + ", l2=" + l2 + ", l1=" + l1;
if (PRINT_RESULTS) {
System.out.println(msg);
for (int j = 0; j < mln.getnLayers(); j++)
System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input,
labels);
assertTrue(msg, gradOK);
TestUtils.testModelSerialization(mln);
}
}
@Test
public void testVaePretrain() {
Nd4j.getRandom().setSeed(12345);
Activation[] activFns = {Activation.IDENTITY, Activation.TANH, Activation.SOFTSIGN};
Activation[] pzxAfns = {Activation.IDENTITY, Activation.IDENTITY, Activation.TANH};
Activation[] pxzAfns = {Activation.TANH, Activation.TANH, Activation.IDENTITY};
//use l2vals[i] with l1vals[i]
double[] l2vals = {0.0, 0.4, 0.4};
double[] l1vals = {0.0, 0.5, 0.0};
double[] biasL2 = {0.0, 0.0, 0.2};
double[] biasL1 = {0.0, 0.6, 0.0};
int[][] encoderLayerSizes = new int[][] {{5}, {3, 4}, {3, 4}};
int[][] decoderLayerSizes = new int[][] {{4}, {2}, {4, 3}};
int[] minibatches = new int[]{1,3,2,3};
Nd4j.getRandom().setSeed(12345);
for( int i=0; i<activFns.length; i++ ){
double l2 = l2vals[i];
double l1 = l1vals[i];
int[] encoderSizes = encoderLayerSizes[i];
int[] decoderSizes = decoderLayerSizes[i];
int minibatch = minibatches[i];
INDArray input = Nd4j.rand(minibatch, 4);
INDArray labels = Nd4j.create(minibatch, 3);
for (int j = 0; j < minibatch; j++) {
labels.putScalar(j, j % 3, 1.0);
}
Activation afn = activFns[i];
Activation pzxAfn = pzxAfns[i];
Activation pxzAfn = pxzAfns[i];
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().l2(l2)
.dataType(DataType.DOUBLE)
.l1(l1).l2Bias(biasL2[i]).l1Bias(biasL1[i]).updater(new NoOp())
.seed(12345L).weightInit(WeightInit.XAVIER).list()
.layer(0, new VariationalAutoencoder.Builder().nIn(4).nOut(3)
.encoderLayerSizes(encoderSizes).decoderLayerSizes(decoderSizes)
.pzxActivationFunction(pzxAfn)
.reconstructionDistribution(
new GaussianReconstructionDistribution(pxzAfn))
.activation(afn).build())
.build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
mln.initGradientsView();
org.deeplearning4j.nn.api.Layer layer = mln.getLayer(0);
String msg = "testVaePretrain() - activationFn=" + afn + ", p(z|x) afn = " + pzxAfn
+ ", p(x|z) afn = " + pxzAfn + ", encLayerSizes = " + Arrays.toString(encoderSizes)
+ ", decLayerSizes = " + Arrays.toString(decoderSizes) + ", l2=" + l2 + ", l1="
+ l1;
if (PRINT_RESULTS) {
System.out.println(msg);
for (int l = 0; l < mln.getnLayers(); l++)
System.out.println("Layer " + l + " # params: " + mln.getLayer(l).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradientsPretrainLayer(layer, DEFAULT_EPS,
DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS,
RETURN_ON_FIRST_FAILURE, input, 12345);
assertTrue(msg, gradOK);
TestUtils.testModelSerialization(mln);
}
}
@Test
public void testVaePretrainReconstructionDistributions() {
int inOutSize = 3;
ReconstructionDistribution[] reconstructionDistributions =
new ReconstructionDistribution[]{new GaussianReconstructionDistribution(Activation.IDENTITY),
new GaussianReconstructionDistribution(Activation.TANH),
new BernoulliReconstructionDistribution(Activation.SIGMOID),
new CompositeReconstructionDistribution.Builder()
.addDistribution(1,
new GaussianReconstructionDistribution(
Activation.IDENTITY))
.addDistribution(1, new BernoulliReconstructionDistribution())
.addDistribution(1,
new GaussianReconstructionDistribution(
Activation.TANH))
.build(),
new ExponentialReconstructionDistribution(Activation.TANH),
new LossFunctionWrapper(new ActivationTanH(), new LossMSE())};
Nd4j.getRandom().setSeed(12345);
for (int i = 0; i < reconstructionDistributions.length; i++) {
int minibatch = (i % 2 == 0 ? 1 : 3);
INDArray data;
switch (i) {
case 0: //Gaussian + identity
case 1: //Gaussian + tanh
data = Nd4j.rand(minibatch, inOutSize);
break;
case 2: //Bernoulli
data = Nd4j.create(minibatch, inOutSize);
Nd4j.getExecutioner().exec(new BernoulliDistribution(data, 0.5), Nd4j.getRandom());
break;
case 3: //Composite
data = Nd4j.create(minibatch, inOutSize);
data.get(NDArrayIndex.all(), NDArrayIndex.interval(0, 1)).assign(Nd4j.rand(minibatch, 1));
Nd4j.getExecutioner()
.exec(new BernoulliDistribution(
data.get(NDArrayIndex.all(), NDArrayIndex.interval(1, 2)), 0.5),
Nd4j.getRandom());
data.get(NDArrayIndex.all(), NDArrayIndex.interval(2, 3)).assign(Nd4j.rand(minibatch, 1));
break;
case 4:
case 5:
data = Nd4j.rand(minibatch, inOutSize);
break;
default:
throw new RuntimeException();
}
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().l2(0.2).l1(0.3)
.dataType(DataType.DOUBLE)
.updater(new NoOp())
.seed(12345L).dist(new NormalDistribution(0, 1))
.list().layer(0,
new VariationalAutoencoder.Builder().nIn(inOutSize).nOut(3)
.encoderLayerSizes(4).decoderLayerSizes(3)
.pzxActivationFunction(Activation.TANH)
.reconstructionDistribution(
reconstructionDistributions[i])
.activation(Activation.TANH)
.build())
.build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
mln.initGradientsView();
org.deeplearning4j.nn.api.Layer layer = mln.getLayer(0);
String msg = "testVaePretrainReconstructionDistributions() - " + reconstructionDistributions[i];
if (PRINT_RESULTS) {
System.out.println(msg);
for (int j = 0; j < mln.getnLayers(); j++)
System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradientsPretrainLayer(layer, DEFAULT_EPS,
DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE,
data, 12345);
assertTrue(msg, gradOK);
TestUtils.testModelSerialization(mln);
}
}
@Test
public void testVaePretrainMultipleSamples() {
int minibatch = 2;
Nd4j.getRandom().setSeed(12345);
for (int numSamples : new int[]{1, 2}) {
INDArray features = Nd4j.rand(DataType.DOUBLE, minibatch, 4);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().l2(0.2).l1(0.3)
.dataType(DataType.DOUBLE)
.updater(new NoOp())
.seed(12345L).weightInit(WeightInit.XAVIER).list()
.layer(0, new VariationalAutoencoder.Builder().nIn(4).nOut(3).encoderLayerSizes(2, 3)
.decoderLayerSizes(4, 3).pzxActivationFunction(Activation.TANH)
.reconstructionDistribution(
new GaussianReconstructionDistribution(Activation.TANH))
.numSamples(numSamples).activation(Activation.TANH)
.build())
.build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
mln.initGradientsView();
org.deeplearning4j.nn.api.Layer layer = mln.getLayer(0);
String msg = "testVaePretrainMultipleSamples() - numSamples = " + numSamples;
if (PRINT_RESULTS) {
System.out.println(msg);
for (int j = 0; j < mln.getnLayers(); j++)
System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradientsPretrainLayer(layer, DEFAULT_EPS,
DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE,
features, 12345);
assertTrue(msg, gradOK);
TestUtils.testModelSerialization(mln);
}
}
}