From 9af4f9f23a6577b73dd31018a06ad80d237df607 Mon Sep 17 00:00:00 2001 From: brian Date: Fri, 24 Mar 2023 15:04:06 +0100 Subject: [PATCH] Playing with some new code Signed-off-by: brian --- .../src/test/java/net/brutex/gan/App.java | 65 +- .../test/java/net/brutex/spark/BrianTest.java | 2 +- .../java/net/brutex/spark/BrianTest2.java | 2 +- .../java/net/brutex/spark/TestServer.java | 2 +- .../java/net/brutex/spark/TestServer2.java | 2 +- .../IntegrationTestBaselineGenerator.java | 4 +- .../integration/IntegrationTestRunner.java | 28 +- .../deeplearning4j/integration/TestUtils.java | 4 +- .../java/org/deeplearning4j/TestUtils.java | 4 +- .../org/deeplearning4j/eval/EvalTest.java | 2 +- .../gradientcheck/BNGradientCheckTest.java | 14 +- .../gradientcheck/CNN1DGradientCheckTest.java | 8 +- .../gradientcheck/CNN3DGradientCheckTest.java | 8 +- .../gradientcheck/CNNGradientCheckTest.java | 14 +- .../GlobalPoolingGradientCheckTests.java | 8 +- .../gradientcheck/GradientCheckTests.java | 16 +- .../GradientCheckTestsComputationGraph.java | 50 +- .../gradientcheck/LRNGradientCheckTests.java | 2 +- .../gradientcheck/LSTMGradientCheckTests.java | 6 +- .../NoBiasGradientCheckTests.java | 2 +- .../OutputLayerGradientChecks.java | 6 +- .../gradientcheck/VaeGradientCheckTests.java | 8 +- .../nn/conf/layers/LayerConfigTest.java | 36 +- .../deeplearning4j/nn/dtypes/DTypeTests.java | 4 +- .../nn/graph/ComputationGraphTestRNN.java | 10 +- .../nn/graph/TestCompGraphUnsupervised.java | 5 +- .../nn/graph/TestComputationGraphNetwork.java | 28 +- .../nn/layers/FrozenLayerTest.java | 2 +- .../deeplearning4j/nn/layers/TestDropout.java | 2 +- .../embedding/EmbeddingLayerTest.java | 4 +- .../nn/layers/ocnn/OCNNOutputLayerTest.java | 2 +- .../samediff/testlayers/SameDiffDense.java | 2 +- .../testlayers/SameDiffDenseVertex.java | 4 +- .../nn/misc/WorkspaceTests.java | 8 +- .../nn/multilayer/MultiLayerTest.java | 2 +- .../nn/multilayer/MultiLayerTestRNN.java | 2 +- .../rl/TestMultiModelGradientApplication.java | 4 +- .../TestTransferLearningModelSerializer.java | 2 +- .../TransferLearningCompGraphTest.java | 6 +- .../TransferLearningHelperTest.java | 2 +- .../optimize/solver/TestOptimizers.java | 4 +- .../regressiontest/RegressionTest060.java | 2 +- .../regressiontest/RegressionTest071.java | 2 +- .../regressiontest/RegressionTest080.java | 2 +- .../regressiontest/RegressionTest100a.java | 2 +- .../regressiontest/RegressionTest100b3.java | 2 +- .../regressiontest/RegressionTest100b4.java | 2 +- .../regressiontest/RegressionTest100b6.java | 2 +- .../customlayer100a/CustomLayer.java | 6 +- .../util/CrashReportingUtilTest.java | 12 +- .../util/ModelSerializerTest.java | 4 +- .../cuda/recurrent/CudnnLSTMHelper.java | 2 +- .../nn/modelimport/keras/KerasLayer.java | 8 +- .../nn/modelimport/keras/KerasModel.java | 4 +- .../keras/config/KerasLayerConfiguration.java | 2 +- .../keras/layers/core/KerasDense.java | 4 +- .../keras/layers/recurrent/KerasLSTM.java | 4 +- .../layers/recurrent/KerasSimpleRnn.java | 2 +- .../layers/wrappers/KerasBidirectional.java | 4 +- .../configurations/FullModelComparisons.java | 4 +- .../brutex/ai/dnn/api/LayerConfiguration.java | 9 + cavis-dnn/cavis-dnn-nn/build.gradle | 6 +- .../ILayer.java} | 22 +- .../ILayerConfiguration.java} | 57 +- .../java/net/brutex/ai/dnn/api/IModel.java | 86 + .../brutex/ai/dnn/api/INeuralNetwork.java} | 53 +- .../dnn/api/INeuralNetworkConfiguration.java | 52 + .../dnn/conf/NeuralNetworkConfiguration.java | 708 +- .../layer/AbstractLayerConfiguration.java | 10 +- .../conf/layer/DenseLayerConfiguration.java | 62 + .../layer/FeedForwardLayerConfiguration.java | 99 + .../impl/network/AbstractNeuralNetwork.java | 72 - .../ai/dnn/impl/network/NeuralNetwork.java | 692 -- .../dnn/networks/ArtificialNeuralNetwork.java | 53 + .../trainer/BaseEarlyStoppingTrainer.java | 2 +- .../gradientcheck/GradientCheckUtil.java | 6 +- .../java/org/deeplearning4j/nn/api/Layer.java | 377 +- .../deeplearning4j/nn/api/ModelAdapter.java | 2 +- .../nn/api/ParamInitializer.java | 10 +- .../deeplearning4j/nn/api/TrainingConfig.java | 2 +- .../org/deeplearning4j/nn/api/Updater.java | 2 +- .../nn/api/layers/LayerConstraint.java | 2 +- .../nn/api/layers/RecurrentLayer.java | 6 +- .../nn/conf/NeuralNetConfiguration.java | 5 +- .../nn/conf/constraint/MaxNormConstraint.java | 4 +- .../conf/constraint/MinMaxNormConstraint.java | 6 +- .../conf/constraint/UnitNormConstraint.java | 4 +- .../nn/conf/graph/LayerVertex.java | 7 +- .../nn/conf/layers/ActivationLayer.java | 2 +- .../nn/conf/layers/BaseLayer.java | 4 +- .../nn/conf/layers/CapsuleLayer.java | 4 +- .../nn/conf/layers/DenseLayer.java | 4 +- .../deeplearning4j/nn/conf/layers/Layer.java | 4 +- .../nn/conf/layers/LayerValidation.java | 4 +- .../layers/LocalResponseNormalization.java | 2 +- .../nn/conf/layers/PrimaryCapsules.java | 2 +- .../misc/ElementWiseMultiplicationLayer.java | 2 +- .../layers/recurrent/TimeDistributed.java | 2 +- .../layers/samediff/SameDiffLambdaLayer.java | 2 +- .../layers/samediff/SameDiffLambdaVertex.java | 2 +- .../layers/wrapper/BuildingBlockLayer.java | 97 - .../nn/conf/memory/NetworkMemoryReport.java | 2 +- .../nn/conf/weightnoise/IWeightNoise.java | 2 +- .../nn/graph/ComputationGraph.java | 230 +- .../nn/graph/vertex/BaseGraphVertex.java | 4 +- .../nn/graph/vertex/GraphVertex.java | 4 +- .../nn/graph/vertex/impl/LayerVertex.java | 6 +- .../impl/rnn/DuplicateToTimeSeriesVertex.java | 4 +- .../vertex/impl/rnn/LastTimeStepVertex.java | 4 +- .../impl/rnn/ReverseTimeSeriesVertex.java | 4 +- .../nn/layers/recurrent/LSTMHelpers.java | 2 +- .../nn/multilayer/MultiLayerNetwork.java | 8061 +++++++++-------- .../nn/transferlearning/TransferLearning.java | 2 +- .../TransferLearningHelper.java | 4 +- .../nn/updater/BaseMultiLayerUpdater.java | 4 +- .../optimize/api/TrainingListener.java | 4 +- .../listeners/CheckpointListener.java | 4 +- .../optimize/solvers/BaseOptimizer.java | 6 +- .../util/Convolution1DUtils.java | 2 +- .../util/CrashReportingUtil.java | 30 +- .../deeplearning4j/util/ModelSerializer.java | 2 +- .../org/deeplearning4j/util/NetworkUtils.java | 8 +- .../deeplearning4j/util/OutputLayerUtil.java | 2 +- .../deeplearning4j/util/TimeSeriesUtils.java | 2 +- .../java/net/brutex/ai/dnn/api/dnnTest.java | 127 + .../brutex/ai/dnn/conf/layer/FFLayerTest.java | 47 + .../nn/layers/HelperUtilsTest.java | 2 +- .../parallelism/InplaceParallelInference.java | 3 +- .../parallelism/ParallelInference.java | 2 +- .../parallelism/trainer/DefaultTrainer.java | 4 +- .../impl/graph/SparkComputationGraph.java | 2 +- ...VaeReconstructionErrorWithKeyFunction.java | 2 +- ...GVaeReconstructionProbWithKeyFunction.java | 2 +- ...VaeReconstructionErrorWithKeyFunction.java | 2 +- .../VaeReconstructionProbWithKeyFunction.java | 2 +- .../ParameterAveragingTrainingMaster.java | 4 +- .../spark/impl/misc/TestFrozenLayers.java | 4 +- ...TestSparkMultiLayerParameterAveraging.java | 10 +- .../pw/SharedTrainingWrapper.java | 6 +- .../training/SharedTrainingMaster.java | 2 +- .../ui/model/stats/BaseStatsListener.java | 5 +- .../ui/model/stats/impl/SbeStatsReport.java | 4 +- .../ui/module/train/TrainModuleUtils.java | 8 +- .../templates/TrainingModel.html.ftl | 6 +- .../org/deeplearning4j/zoo/TestUtils.java | 2 +- settings.gradle | 2 +- 146 files changed, 6151 insertions(+), 5493 deletions(-) rename cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/{conf/layer/LayerConfiguration.java => api/ILayer.java} (60%) rename cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/{conf/layer/FFLayer.java => api/ILayerConfiguration.java} (56%) create mode 100644 cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/IModel.java rename cavis-dnn/cavis-dnn-nn/src/main/java/{org/deeplearning4j/nn/api/NeuralNetwork.java => net/brutex/ai/dnn/api/INeuralNetwork.java} (58%) create mode 100644 cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/INeuralNetworkConfiguration.java create mode 100644 cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/conf/layer/DenseLayerConfiguration.java create mode 100644 cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/conf/layer/FeedForwardLayerConfiguration.java delete mode 100644 cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/impl/network/AbstractNeuralNetwork.java delete mode 100644 cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/impl/network/NeuralNetwork.java create mode 100644 cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/networks/ArtificialNeuralNetwork.java delete mode 100644 cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/wrapper/BuildingBlockLayer.java create mode 100644 cavis-dnn/cavis-dnn-nn/src/test/java/net/brutex/ai/dnn/api/dnnTest.java create mode 100644 cavis-dnn/cavis-dnn-nn/src/test/java/net/brutex/ai/dnn/conf/layer/FFLayerTest.java diff --git a/brutex-extended-tests/src/test/java/net/brutex/gan/App.java b/brutex-extended-tests/src/test/java/net/brutex/gan/App.java index f5b47031b..fca68610a 100644 --- a/brutex-extended-tests/src/test/java/net/brutex/gan/App.java +++ b/brutex-extended-tests/src/test/java/net/brutex/gan/App.java @@ -21,8 +21,19 @@ package net.brutex.gan; -import java.util.List; +import java.awt.BorderLayout; +import java.awt.Dimension; +import java.awt.GridLayout; +import java.awt.Image; +import java.awt.image.BufferedImage; +import java.io.File; +import java.util.Arrays; import java.util.Random; +import javax.swing.ImageIcon; +import javax.swing.JFrame; +import javax.swing.JLabel; +import javax.swing.JPanel; +import javax.swing.WindowConstants; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.ArrayUtils; import org.datavec.api.split.FileSplit; @@ -34,20 +45,23 @@ import org.datavec.image.transform.PipelineImageTransform; import org.datavec.image.transform.ResizeImageTransform; import org.datavec.image.transform.ShowImageTransform; import org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator; -import org.deeplearning4j.nn.conf.CacheMode; import org.deeplearning4j.nn.conf.GradientNormalization; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import net.brutex.ai.dnn.conf.NeuralNetworkConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.conf.layers.*; +import org.deeplearning4j.nn.conf.layers.ActivationLayer; +import org.deeplearning4j.nn.conf.layers.DenseLayer; +import org.deeplearning4j.nn.conf.layers.DropoutLayer; +import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.conf.layers.misc.FrozenLayerWithBackprop; -import org.deeplearning4j.nn.conf.layers.wrapper.BuildingBlockLayer; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.weights.WeightInit; +import org.deeplearning4j.nn.weights.WeightInitXavier; import org.deeplearning4j.optimize.listeners.ScoreToChartListener; import org.junit.jupiter.api.Test; import org.nd4j.linalg.activations.Activation; +import org.nd4j.linalg.activations.impl.ActivationIdentity; import org.nd4j.linalg.activations.impl.ActivationLReLU; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.DataSet; @@ -55,13 +69,6 @@ import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.learning.config.Adam; import org.nd4j.linalg.learning.config.IUpdater; - - -import javax.swing.*; -import java.awt.*; -import java.awt.image.BufferedImage; -import java.io.File; -import java.util.Arrays; import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction; @Slf4j @@ -106,7 +113,7 @@ public class App { * @return config */ private static MultiLayerConfiguration generator() { - MultiLayerConfiguration confxx = new NeuralNetConfiguration.Builder() + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(42) .updater(UPDATER) .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer) @@ -117,23 +124,8 @@ public class App { .setInputType(InputType.convolutional(X_DIM, Y_DIM, CHANNELS)) // .inputPreProcessor("CNN1", new FeedForwardToCnnPreProcessor(Y_DIM, X_DIM, CHANNELS)) .build(); - log.debug("Generator network: \n{}", confxx.toJson()); - NeuralNetworkConfiguration conf2 = NeuralNetworkConfiguration.builder().build(); - - NeuralNetworkConfiguration confx = NeuralNetworkConfiguration.builder() - .cacheMode(CacheMode.HOST) - .layer( new DenseLayer.Builder().build()) - .layer( new DenseLayer.Builder().build()) - .layer( BuildingBlockLayer.builder().build()) - .layers( List.of(genLayers())) - .inputType(InputType.convolutional(X_DIM, Y_DIM, CHANNELS)) - .build(); - - - - - return confx; + return conf; } private static Layer[] disLayers() { @@ -155,6 +147,7 @@ public class App { } private static MultiLayerConfiguration discriminator() { + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(42) .updater(UPDATER) @@ -183,13 +176,13 @@ public class App { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(42) - .updater(UPDATER) - .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer) - .gradientNormalizationThreshold(GRADIENT_THRESHOLD) - .weightInit(WeightInit.XAVIER) - .activation(Activation.IDENTITY) - .list(layers) - .setInputType(InputType.convolutional(X_DIM, Y_DIM, CHANNELS)) + .updater( Adam.builder().learningRate(0.0002).beta1(0.5).build() ) + .gradientNormalization( GradientNormalization.RenormalizeL2PerLayer) + .gradientNormalizationThreshold( 100 ) + .weightInit( new WeightInitXavier() ) + .activation( new ActivationIdentity()) + .list( layers ) + .setInputType( InputType.convolutional(X_DIM, Y_DIM, CHANNELS)) .build(); return conf; diff --git a/brutex-extended-tests/src/test/java/net/brutex/spark/BrianTest.java b/brutex-extended-tests/src/test/java/net/brutex/spark/BrianTest.java index efb54aa29..bc0aafa13 100644 --- a/brutex-extended-tests/src/test/java/net/brutex/spark/BrianTest.java +++ b/brutex-extended-tests/src/test/java/net/brutex/spark/BrianTest.java @@ -295,7 +295,7 @@ public class BrianTest extends BaseSparkSessionTest { .activation(Activation.RELU).l2(0.001).build()) .layer(1, new DenseLayer.Builder().nIn(20).nOut(20).weightInit(WeightInit.XAVIER) .activation(Activation.RELU).build()) - //.layer(2, new DenseLayer.Builder().nIn(9).nOut(9).weightInit(WeightInit.XAVIER).activation(Activation.RELU).build()) + //.layer(2, new DenseLayerConfiguration.Builder().nIn(9).nOut(9).weightInit(WeightInit.XAVIER).activation(Activation.RELU).build()) .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.XENT).nIn(20).nOut(4) .weightInit(WeightInit.XAVIER).activation(Activation.SIGMOID).build()) .build(); diff --git a/brutex-extended-tests/src/test/java/net/brutex/spark/BrianTest2.java b/brutex-extended-tests/src/test/java/net/brutex/spark/BrianTest2.java index 4e340c69a..f32c3c4de 100644 --- a/brutex-extended-tests/src/test/java/net/brutex/spark/BrianTest2.java +++ b/brutex-extended-tests/src/test/java/net/brutex/spark/BrianTest2.java @@ -301,7 +301,7 @@ public class BrianTest2 /*extends BaseDL4JTest*/ { .list() .layer(0, new DenseLayer.Builder().nIn(5).nOut(20).weightInit(WeightInit.XAVIER).activation(Activation.RELU).l2(0.001).build()) .layer(1, new DenseLayer.Builder().nIn(20).nOut(20).weightInit(WeightInit.XAVIER).activation(Activation.RELU).build()) - //.layer(2, new DenseLayer.Builder().nIn(9).nOut(9).weightInit(WeightInit.XAVIER).activation(Activation.RELU).build()) + //.layer(2, new DenseLayerConfiguration.Builder().nIn(9).nOut(9).weightInit(WeightInit.XAVIER).activation(Activation.RELU).build()) .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.XENT).nIn(20).nOut(4).weightInit(WeightInit.XAVIER).activation(Activation.SIGMOID).build()) .build(); diff --git a/brutex-extended-tests/src/test/java/net/brutex/spark/TestServer.java b/brutex-extended-tests/src/test/java/net/brutex/spark/TestServer.java index 353195da4..b81f70fc8 100644 --- a/brutex-extended-tests/src/test/java/net/brutex/spark/TestServer.java +++ b/brutex-extended-tests/src/test/java/net/brutex/spark/TestServer.java @@ -95,7 +95,7 @@ public class TestServer { .list() //.layer(0, new ConvolutionLayer.Builder().nIn(1).kernelSize(1, 5).stride(1,1).padding(0,2).nOut(1).name("1st Filter").updater(new Adam.Builder().learningRate(0.2).build()).build()) //.layer(1, new ConvolutionLayer.Builder().nIn(1).kernelSize(1, 2).stride(1,2).padding(0,0).nOut(1).name("2nd Filter").updater(new Adam.Builder().learningRate(0.1).build()).build()) - // .layer(1, new DenseLayer.Builder().nIn(10).nOut(64).activation(Activation.RELU).build()) + // .layer(1, new DenseLayerConfiguration.Builder().nIn(10).nOut(64).activation(Activation.RELU).build()) .layer(0, new DenseLayer.Builder().nIn(10).nOut(100).activation(Activation.RELU).l2(0.003).build()) .layer(1, new LSTM.Builder().nIn(100).nOut(100).activation(Activation.TANH).build()) .layer(2, new LSTM.Builder().nIn(100).nOut(100).activation(Activation.TANH).build()) diff --git a/brutex-extended-tests/src/test/java/net/brutex/spark/TestServer2.java b/brutex-extended-tests/src/test/java/net/brutex/spark/TestServer2.java index d6ac22e11..ac625f2b6 100644 --- a/brutex-extended-tests/src/test/java/net/brutex/spark/TestServer2.java +++ b/brutex-extended-tests/src/test/java/net/brutex/spark/TestServer2.java @@ -131,7 +131,7 @@ public class TestServer2 { .list() //.layer(0, new ConvolutionLayer.Builder().nIn(1).kernelSize(1, 5).stride(1,1).padding(0,2).nOut(1).name("1st Filter").updater(new Adam.Builder().learningRate(0.2).build()).build()) //.layer(1, new ConvolutionLayer.Builder().nIn(1).kernelSize(1, 2).stride(1,2).padding(0,0).nOut(1).name("2nd Filter").updater(new Adam.Builder().learningRate(0.1).build()).build()) - // .layer(1, new DenseLayer.Builder().nIn(10).nOut(64).activation(Activation.RELU).build()) + // .layer(1, new DenseLayerConfiguration.Builder().nIn(10).nOut(64).activation(Activation.RELU).build()) .layer(0, new DenseLayer.Builder().nIn(10).nOut(100).activation(Activation.RELU).l2(0.003).build()) .layer(1, new LSTM.Builder().nIn(100).nOut(100).activation(Activation.TANH).build()) .layer(2, new LSTM.Builder().nIn(100).nOut(100).activation(Activation.TANH).build()) diff --git a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/IntegrationTestBaselineGenerator.java b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/IntegrationTestBaselineGenerator.java index 7c4bcc9ac..8111d2b7d 100644 --- a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/IntegrationTestBaselineGenerator.java +++ b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/IntegrationTestBaselineGenerator.java @@ -284,7 +284,7 @@ public class IntegrationTestBaselineGenerator { INDArray paramsPostTraining; if (modelType == ModelType.MLN) { int[] layersToTrain = tc.getUnsupervisedTrainLayersMLN(); - Preconditions.checkState(layersToTrain != null, "Layer indices must not be null"); + Preconditions.checkState(layersToTrain != null, "ILayer indices must not be null"); DataSetIterator dsi = new MultiDataSetWrapperIterator(iter); for (int i : layersToTrain) { @@ -293,7 +293,7 @@ public class IntegrationTestBaselineGenerator { paramsPostTraining = mln.params(); } else if (modelType == ModelType.CG) { String[] layersToTrain = tc.getUnsupervisedTrainLayersCG(); - Preconditions.checkState(layersToTrain != null, "Layer names must not be null"); + Preconditions.checkState(layersToTrain != null, "ILayer names must not be null"); for (String i : layersToTrain) { cg.pretrainLayer(i, iter); diff --git a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/IntegrationTestRunner.java b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/IntegrationTestRunner.java index fbc0d60a3..489c8021d 100644 --- a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/IntegrationTestRunner.java +++ b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/IntegrationTestRunner.java @@ -200,7 +200,7 @@ public class IntegrationTestRunner { m = cg; ComputationGraph loaded = ComputationGraph.load(savedModel, true); - assertEquals(loaded.getConfiguration(), cg.getConfiguration(), "Configs not equal" ); + assertEquals(loaded.getComputationGraphConfiguration(), cg.getComputationGraphConfiguration(), "Configs not equal" ); assertEquals( loaded.params(), cg.params(), "Params not equal"); assertEquals(loaded.paramTable(), cg.paramTable(), "Param table not equal"); } else if(config instanceof SameDiff){ @@ -383,7 +383,7 @@ public class IntegrationTestRunner { org.deeplearning4j.nn.api.Layer[] layers; if(modelType == ModelType.MLN){ int[] layersToTrain = tc.getUnsupervisedTrainLayersMLN(); - Preconditions.checkState(layersToTrain != null, "Layer indices must not be null"); + Preconditions.checkState(layersToTrain != null, "ILayer indices must not be null"); DataSetIterator dsi = new MultiDataSetWrapperIterator(iter); for( int i : layersToTrain){ @@ -393,7 +393,7 @@ public class IntegrationTestRunner { layers = mln.getLayers(); } else if(modelType == ModelType.CG) { String[] layersToTrain = tc.getUnsupervisedTrainLayersCG(); - Preconditions.checkState(layersToTrain != null, "Layer names must not be null"); + Preconditions.checkState(layersToTrain != null, "ILayer names must not be null"); for( String i : layersToTrain){ cg.pretrainLayer(i, iter); @@ -429,8 +429,8 @@ public class IntegrationTestRunner { isTbptt = mln.getLayerWiseConfigurations().getBackpropType() == BackpropType.TruncatedBPTT; tbpttLength = mln.getLayerWiseConfigurations().getTbpttFwdLength(); } else if(modelType == ModelType.CG) { - isTbptt = cg.getConfiguration().getBackpropType() == BackpropType.TruncatedBPTT; - tbpttLength = cg.getConfiguration().getTbpttFwdLength(); + isTbptt = cg.getComputationGraphConfiguration().getBackpropType() == BackpropType.TruncatedBPTT; + tbpttLength = cg.getComputationGraphConfiguration().getTbpttFwdLength(); } else { isTbptt = false; tbpttLength = 0; @@ -458,11 +458,11 @@ public class IntegrationTestRunner { epochAfter = mln.getEpochCount(); layers = mln.getLayers(); } else if(modelType == ModelType.CG){ - iterBefore = cg.getConfiguration().getIterationCount(); - epochBefore = cg.getConfiguration().getEpochCount(); + iterBefore = cg.getComputationGraphConfiguration().getIterationCount(); + epochBefore = cg.getComputationGraphConfiguration().getEpochCount(); cg.fit(countingIter); - iterAfter = cg.getConfiguration().getIterationCount(); - epochAfter = cg.getConfiguration().getEpochCount(); + iterAfter = cg.getComputationGraphConfiguration().getIterationCount(); + epochAfter = cg.getComputationGraphConfiguration().getEpochCount(); layers = cg.getLayers(); } else { iterBefore = sd.getTrainingConfig().getIterationCount(); @@ -611,7 +611,7 @@ public class IntegrationTestRunner { } else if(modelType == ModelType.CG){ ModelSerializer.writeModel(m, f, true); ComputationGraph restored = ComputationGraph.load(f, true); - assertEquals(cg.getConfiguration(), restored.getConfiguration()); + assertEquals(cg.getComputationGraphConfiguration(), restored.getComputationGraphConfiguration()); assertEquals(cg.params(), restored.params()); } else { sd.save(f, true); @@ -745,7 +745,7 @@ public class IntegrationTestRunner { preProcessors = mln.getLayerWiseConfigurations().getInputPreProcessors().values(); } else { preProcessors = new ArrayList<>(); - for (org.deeplearning4j.nn.conf.graph.GraphVertex gv : cg.getConfiguration().getVertices().values()) { + for (org.deeplearning4j.nn.conf.graph.GraphVertex gv : cg.getComputationGraphConfiguration().getVertices().values()) { if (gv instanceof LayerVertex) { InputPreProcessor pp = ((LayerVertex) gv).getPreProcessor(); if (pp != null) { @@ -760,7 +760,7 @@ public class IntegrationTestRunner { //Collect vertex coverage information if (!isMLN) { - for (org.deeplearning4j.nn.conf.graph.GraphVertex gv : cg.getConfiguration().getVertices().values()) { + for (org.deeplearning4j.nn.conf.graph.GraphVertex gv : cg.getComputationGraphConfiguration().getVertices().values()) { vertexConfClassesSeen.put(gv.getClass(), vertexConfClassesSeen.getOrDefault(gv.getClass(), 0) + 1); } } @@ -872,14 +872,14 @@ public class IntegrationTestRunner { log.info("||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||"); - log.info("Layer coverage - classes seen:"); + log.info("ILayer coverage - classes seen:"); for (Class c : layerClasses) { if (layerConfClassesSeen.containsKey(c)) { log.info("Class seen {} times in tests: {}", layerConfClassesSeen.get(c), c.getName()); } } - log.info("Layer classes NOT seen in any tests:"); + log.info("ILayer classes NOT seen in any tests:"); for (Class c : layerClasses) { if (!layerConfClassesSeen.containsKey(c)) { log.info("Class NOT seen in any tests: {}", c.getName()); diff --git a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/TestUtils.java b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/TestUtils.java index 5c16cc908..e03f2a523 100644 --- a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/TestUtils.java +++ b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/TestUtils.java @@ -73,7 +73,7 @@ public class TestUtils { ByteArrayInputStream bais = new ByteArrayInputStream(bytes); restored = ModelSerializer.restoreComputationGraph(bais, true); - assertEquals(net.getConfiguration(), restored.getConfiguration()); + assertEquals(net.getComputationGraphConfiguration(), restored.getComputationGraphConfiguration()); assertEquals(net.params(), restored.params()); } catch (IOException e){ //Should never happen @@ -81,7 +81,7 @@ public class TestUtils { } //Also check the ComputationGraphConfiguration is serializable (required by Spark etc) - ComputationGraphConfiguration conf = net.getConfiguration(); + ComputationGraphConfiguration conf = net.getComputationGraphConfiguration(); serializeDeserializeJava(conf); return restored; diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/TestUtils.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/TestUtils.java index f1e12d123..cecc969ac 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/TestUtils.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/TestUtils.java @@ -90,7 +90,7 @@ public class TestUtils { ByteArrayInputStream bais = new ByteArrayInputStream(bytes); restored = ModelSerializer.restoreComputationGraph(bais, true); - assertEquals(net.getConfiguration(), restored.getConfiguration()); + assertEquals(net.getComputationGraphConfiguration(), restored.getComputationGraphConfiguration()); assertEquals(net.params(), restored.params()); } catch (IOException e){ //Should never happen @@ -98,7 +98,7 @@ public class TestUtils { } //Also check the ComputationGraphConfiguration is serializable (required by Spark etc) - ComputationGraphConfiguration conf = net.getConfiguration(); + ComputationGraphConfiguration conf = net.getComputationGraphConfiguration(); serializeDeserializeJava(conf); return restored; diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/EvalTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/EvalTest.java index 30cb1e5ca..7b44d26c9 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/EvalTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/EvalTest.java @@ -626,7 +626,7 @@ public class EvalTest extends BaseDL4JTest { net.evaluate(iter); net.evaluateROCMultiClass(iter, 0); - cg.getConfiguration().setValidateOutputLayerConfig(false); + cg.getComputationGraphConfiguration().setValidateOutputLayerConfig(false); cg.evaluate(iter); cg.evaluateROCMultiClass(iter, 0); } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/BNGradientCheckTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/BNGradientCheckTest.java index 65f8787d8..f45861f57 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/BNGradientCheckTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/BNGradientCheckTest.java @@ -90,7 +90,7 @@ public class BNGradientCheckTest extends BaseDL4JTest { mln.init(); // for (int j = 0; j < mln.getnLayers(); j++) -// System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + mln.getLayer(j).numParams()); //Mean and variance vars are not gradient checkable; mean/variance "gradient" is used to implement running mean/variance calc //i.e., runningMean = decay * runningMean + (1-decay) * batchMean @@ -135,7 +135,7 @@ public class BNGradientCheckTest extends BaseDL4JTest { mln.init(); // for (int j = 0; j < mln.getnLayers(); j++) -// System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + mln.getLayer(j).numParams()); //Mean and variance vars are not gradient checkable; mean/variance "gradient" is used to implement running mean/variance calc //i.e., runningMean = decay * runningMean + (1-decay) * batchMean @@ -237,7 +237,7 @@ public class BNGradientCheckTest extends BaseDL4JTest { + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + ", l1=" + l1vals[j] + ", l2=" + l2vals[j]); // for (int k = 0; k < mln.getnLayers(); k++) -// System.out.println("Layer " + k + " # params: " + mln.getLayer(k).numParams()); +// System.out.println("ILayer " + k + " # params: " + mln.getLayer(k).numParams()); //Mean and variance vars are not gradient checkable; mean/variance "gradient" is used to implement running mean/variance calc //i.e., runningMean = decay * runningMean + (1-decay) * batchMean @@ -341,7 +341,7 @@ public class BNGradientCheckTest extends BaseDL4JTest { + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + ", l1=" + l1vals[j] + ", l2=" + l2vals[j]); // for (int k = 0; k < mln.getnLayers(); k++) -// System.out.println("Layer " + k + " # params: " + mln.getLayer(k).numParams()); +// System.out.println("ILayer " + k + " # params: " + mln.getLayer(k).numParams()); //Mean and variance vars are not gradient checkable; mean/variance "gradient" is used to implement running mean/variance calc //i.e., runningMean = decay * runningMean + (1-decay) * batchMean @@ -385,7 +385,7 @@ public class BNGradientCheckTest extends BaseDL4JTest { mln.init(); // for (int j = 0; j < mln.getnLayers(); j++) -// System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + mln.getLayer(j).numParams()); //Mean and variance vars are not gradient checkable; mean/variance "gradient" is used to implement running mean/variance calc //i.e., runningMean = decay * runningMean + (1-decay) * batchMean @@ -430,7 +430,7 @@ public class BNGradientCheckTest extends BaseDL4JTest { mln.init(); // for (int j = 0; j < mln.getnLayers(); j++) -// System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + mln.getLayer(j).numParams()); //Mean and variance vars are not gradient checkable; mean/variance "gradient" is used to implement running mean/variance calc //i.e., runningMean = decay * runningMean + (1-decay) * batchMean @@ -572,7 +572,7 @@ public class BNGradientCheckTest extends BaseDL4JTest { + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + ", l1=" + l1vals[j] + ", l2=" + l2vals[j]); // for (int k = 0; k < net.getNumLayers(); k++) -// System.out.println("Layer " + k + " # params: " + net.getLayer(k).numParams()); +// System.out.println("ILayer " + k + " # params: " + net.getLayer(k).numParams()); //Mean and variance vars are not gradient checkable; mean/variance "gradient" is used to implement running mean/variance calc //i.e., runningMean = decay * runningMean + (1-decay) * batchMean diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN1DGradientCheckTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN1DGradientCheckTest.java index b61c1fe24..b9f461775 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN1DGradientCheckTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN1DGradientCheckTest.java @@ -118,7 +118,7 @@ public class CNN1DGradientCheckTest extends BaseDL4JTest { if (PRINT_RESULTS) { System.out.println(msg); // for (int j = 0; j < net.getnLayers(); j++) -// System.out.println("Layer " + j + " # params: " + net.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + net.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, @@ -198,7 +198,7 @@ public class CNN1DGradientCheckTest extends BaseDL4JTest { if (PRINT_RESULTS) { System.out.println(msg); // for (int j = 0; j < net.getnLayers(); j++) -// System.out.println("Layer " + j + " # params: " + net.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + net.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, @@ -282,7 +282,7 @@ public class CNN1DGradientCheckTest extends BaseDL4JTest { if (PRINT_RESULTS) { System.out.println(msg); // for (int j = 0; j < net.getnLayers(); j++) -// System.out.println("Layer " + j + " # params: " + net.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + net.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, @@ -359,7 +359,7 @@ public class CNN1DGradientCheckTest extends BaseDL4JTest { if (PRINT_RESULTS) { System.out.println(msg); // for (int j = 0; j < net.getnLayers(); j++) -// System.out.println("Layer " + j + " # params: " + net.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + net.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN3DGradientCheckTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN3DGradientCheckTest.java index 4d3de0bfb..1f4a1ceec 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN3DGradientCheckTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN3DGradientCheckTest.java @@ -149,7 +149,7 @@ public class CNN3DGradientCheckTest extends BaseDL4JTest { if (PRINT_RESULTS) { log.info(msg); // for (int j = 0; j < net.getnLayers(); j++) { -// log.info("Layer " + j + " # params: " + net.getLayer(j).numParams()); +// log.info("ILayer " + j + " # params: " + net.getLayer(j).numParams()); // } } @@ -252,7 +252,7 @@ public class CNN3DGradientCheckTest extends BaseDL4JTest { if (PRINT_RESULTS) { log.info(msg); // for (int j = 0; j < net.getnLayers(); j++) { -// log.info("Layer " + j + " # params: " + net.getLayer(j).numParams()); +// log.info("ILayer " + j + " # params: " + net.getLayer(j).numParams()); // } } @@ -431,7 +431,7 @@ public class CNN3DGradientCheckTest extends BaseDL4JTest { if (PRINT_RESULTS) { log.info(msg); // for (int j = 0; j < net.getnLayers(); j++) { -// log.info("Layer " + j + " # params: " + net.getLayer(j).numParams()); +// log.info("ILayer " + j + " # params: " + net.getLayer(j).numParams()); // } } @@ -530,7 +530,7 @@ public class CNN3DGradientCheckTest extends BaseDL4JTest { if (PRINT_RESULTS) { log.info(msg); // for (int j = 0; j < net.getnLayers(); j++) { -// log.info("Layer " + j + " # params: " + net.getLayer(j).numParams()); +// log.info("ILayer " + j + " # params: " + net.getLayer(j).numParams()); // } } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java index b9536ee41..b737fcf79 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java @@ -137,7 +137,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest { System.out.println(name + " - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst); // for (int j = 0; j < mln.getnLayers(); j++) -// System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + mln.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, @@ -231,7 +231,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest { + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst); // for (int j = 0; j < mln.getnLayers(); j++) -// System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + mln.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, @@ -293,7 +293,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest { if (PRINT_RESULTS) { System.out.println(msg); // for (int j = 0; j < net.getnLayers(); j++) -// System.out.println("Layer " + j + " # params: " + net.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + net.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); @@ -361,7 +361,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest { if (PRINT_RESULTS) { System.out.println(msg); // for (int j = 0; j < net.getnLayers(); j++) -// System.out.println("Layer " + j + " # params: " + net.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + net.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); @@ -427,7 +427,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest { if (PRINT_RESULTS) { System.out.println(msg); // for (int j = 0; j < net.getnLayers(); j++) -// System.out.println("Layer " + j + " # params: " + net.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + net.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, @@ -500,7 +500,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest { if (PRINT_RESULTS) { System.out.println(msg); // for (int j = 0; j < net.getnLayers(); j++) -// System.out.println("Layer " + j + " # params: " + net.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + net.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, @@ -920,7 +920,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest { if (PRINT_RESULTS) { System.out.println(msg); // for (int j = 0; j < net.getnLayers(); j++) -// System.out.println("Layer " + j + " # params: " + net.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + net.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/GlobalPoolingGradientCheckTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/GlobalPoolingGradientCheckTests.java index 7cb10f83b..36574096d 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/GlobalPoolingGradientCheckTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/GlobalPoolingGradientCheckTests.java @@ -95,7 +95,7 @@ public class GlobalPoolingGradientCheckTests extends BaseDL4JTest { System.out.println("testLSTMGlobalPoolingBasicMultiLayer() - " + pt + ", minibatch = " + miniBatchSize); // for (int j = 0; j < mln.getnLayers(); j++) -// System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + mln.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, @@ -156,7 +156,7 @@ public class GlobalPoolingGradientCheckTests extends BaseDL4JTest { if (PRINT_RESULTS) { System.out.println("testCnnGlobalPoolingBasicMultiLayer() - " + pt + ", minibatch = " + miniBatchSize + " - " + (nchw ? "NCHW" : "NHWC")); // for (int j = 0; j < mln.getnLayers(); j++) -// System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + mln.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, @@ -216,7 +216,7 @@ public class GlobalPoolingGradientCheckTests extends BaseDL4JTest { if (PRINT_RESULTS) { System.out.println("testLSTMGlobalPoolingBasicMultiLayer() - " + pt + ", minibatch = " + miniBatchSize); // for (int j = 0; j < mln.getnLayers(); j++) -// System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + mln.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.MLNConfig().net(mln).input(input) @@ -299,7 +299,7 @@ public class GlobalPoolingGradientCheckTests extends BaseDL4JTest { System.out.println("testCnnGlobalPoolingBasicMultiLayer() - " + pt + ", minibatch = " + miniBatchSize); // for (int j = 0; j < mln.getnLayers(); j++) -// System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + mln.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.MLNConfig().net(mln).input(input) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTests.java index cab80a69a..553477bd5 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTests.java @@ -123,7 +123,7 @@ public class GradientCheckTests extends BaseDL4JTest { + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst); // for (int j = 0; j < mln.getnLayers(); j++) -// System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + mln.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, @@ -203,7 +203,7 @@ public class GradientCheckTests extends BaseDL4JTest { + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst); // for (int j = 0; j < mln.getnLayers(); j++) -// System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + mln.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, @@ -297,7 +297,7 @@ public class GradientCheckTests extends BaseDL4JTest { + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + ", l2=" + l2 + ", l1=" + l1); // for (int j = 0; j < mln.getnLayers(); j++) -// System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + mln.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, @@ -342,7 +342,7 @@ public class GradientCheckTests extends BaseDL4JTest { if (PRINT_RESULTS) { System.out.println("testEmbeddingLayerSimple"); // for (int j = 0; j < mln.getnLayers(); j++) -// System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + mln.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, @@ -382,7 +382,7 @@ public class GradientCheckTests extends BaseDL4JTest { if (PRINT_RESULTS) { System.out.println("testEmbeddingLayerSimple"); // for (int j = 0; j < mln.getnLayers(); j++) -// System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + mln.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, @@ -472,7 +472,7 @@ public class GradientCheckTests extends BaseDL4JTest { if (PRINT_RESULTS) { System.out.println(msg); // for (int j = 0; j < mln.getnLayers(); j++) -// System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + mln.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, @@ -714,7 +714,7 @@ public class GradientCheckTests extends BaseDL4JTest { // (a) activation function // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation') // (c) Loss function (with specified output activations) - // (d) Layer Normalization enabled / disabled + // (d) ILayer Normalization enabled / disabled Activation[] activFns = {Activation.SIGMOID, Activation.TANH}; boolean[] characteristic = {true, false}; //If true: run some backprop steps first @@ -776,7 +776,7 @@ public class GradientCheckTests extends BaseDL4JTest { + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + ", layerNorm=" + layerNorm); // for (int j = 0; j < mln.getnLayers(); j++) -// System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + mln.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTestsComputationGraph.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTestsComputationGraph.java index ec99f3852..7718078a6 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTestsComputationGraph.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTestsComputationGraph.java @@ -106,7 +106,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { if (PRINT_RESULTS) { System.out.println("testBasicIris()"); // for (int j = 0; j < graph.getNumLayers(); j++) -// System.out.println("Layer " + j + " # params: " + graph.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + graph.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.GraphConfig().net(graph).inputs(new INDArray[]{input}) @@ -157,7 +157,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { if (PRINT_RESULTS) { System.out.println("testBasicIrisWithMerging()"); // for (int j = 0; j < graph.getNumLayers(); j++) -// System.out.println("Layer " + j + " # params: " + graph.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + graph.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.GraphConfig().net(graph).inputs(new INDArray[]{input}) @@ -214,7 +214,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { if (PRINT_RESULTS) { System.out.println("testBasicIrisWithElementWiseVertex(op=" + op + ")"); // for (int j = 0; j < graph.getNumLayers(); j++) -// System.out.println("Layer " + j + " # params: " + graph.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + graph.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.GraphConfig().net(graph).inputs(new INDArray[]{input}) @@ -274,7 +274,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { if (PRINT_RESULTS) { System.out.println("testBasicIrisWithElementWiseVertex(op=" + op + ")"); // for (int j = 0; j < graph.getNumLayers(); j++) -// System.out.println("Layer " + j + " # params: " + graph.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + graph.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.GraphConfig().net(graph).inputs(new INDArray[]{input}) @@ -376,7 +376,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { if (PRINT_RESULTS) { System.out.println(msg); // for (int j = 0; j < graph.getNumLayers(); j++) -// System.out.println("Layer " + j + " # params: " + graph.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + graph.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.GraphConfig().net(graph).inputs(new INDArray[]{input}) @@ -439,7 +439,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { if (PRINT_RESULTS) { System.out.println(msg); // for (int j = 0; j < graph.getNumLayers(); j++) -// System.out.println("Layer " + j + " # params: " + graph.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + graph.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.GraphConfig().net(graph).inputs(new INDArray[]{input}) @@ -478,7 +478,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { if (PRINT_RESULTS) { System.out.println("testLSTMWithSubset()"); // for (int j = 0; j < graph.getNumLayers(); j++) -// System.out.println("Layer " + j + " # params: " + graph.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + graph.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.GraphConfig().net(graph).inputs(new INDArray[]{input}) @@ -515,7 +515,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { if (PRINT_RESULTS) { System.out.println("testLSTMWithLastTimeStepVertex()"); // for (int j = 0; j < graph.getNumLayers(); j++) -// System.out.println("Layer " + j + " # params: " + graph.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + graph.getLayer(j).numParams()); } //First: test with no input mask array @@ -579,7 +579,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { if (PRINT_RESULTS) { System.out.println("testLSTMWithDuplicateToTimeSeries()"); // for (int j = 0; j < graph.getNumLayers(); j++) -// System.out.println("Layer " + j + " # params: " + graph.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + graph.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.GraphConfig().net(graph).inputs(new INDArray[]{input1, input2}) @@ -628,7 +628,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { if (PRINT_RESULTS) { System.out.println("testLSTMWithReverseTimeSeriesVertex()"); // for (int j = 0; j < graph.getNumLayers(); j++) -// System.out.println("Layer " + j + " # params: " + graph.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + graph.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.GraphConfig().net(graph).inputs(new INDArray[]{input}) @@ -683,7 +683,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { if (PRINT_RESULTS) { System.out.println(msg); // for (int j = 0; j < graph.getNumLayers(); j++) -// System.out.println("Layer " + j + " # params: " + graph.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + graph.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.GraphConfig().net(graph).inputs(inputs) @@ -723,7 +723,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { if (PRINT_RESULTS) { System.out.println(msg); // for (int j = 0; j < graph.getNumLayers(); j++) -// System.out.println("Layer " + j + " # params: " + graph.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + graph.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.GraphConfig().net(graph).inputs(new INDArray[]{input}) @@ -769,7 +769,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { if (PRINT_RESULTS) { System.out.println(msg); // for (int j = 0; j < graph.getNumLayers(); j++) -// System.out.println("Layer " + j + " # params: " + graph.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + graph.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.GraphConfig().net(graph).inputs(input) @@ -820,7 +820,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { if (PRINT_RESULTS) { System.out.println(msg); // for (int j = 0; j < graph.getNumLayers(); j++) -// System.out.println("Layer " + j + " # params: " + graph.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + graph.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.GraphConfig().net(graph).inputs(new INDArray[]{input}) @@ -888,7 +888,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { if (PRINT_RESULTS) { System.out.println("testBasicIrisTripletStackingL2Loss()"); // for (int j = 0; j < graph.getNumLayers(); j++) -// System.out.println("Layer " + j + " # params: " + graph.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + graph.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.GraphConfig().net(graph).inputs(new INDArray[]{pos, anc, neg}) @@ -949,7 +949,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { if (PRINT_RESULTS) { System.out.println(msg); // for (int j = 0; j < graph.getNumLayers(); j++) -// System.out.println("Layer " + j + " # params: " + graph.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + graph.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.GraphConfig().net(graph).inputs(new INDArray[]{example}) @@ -1014,7 +1014,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { if (PRINT_RESULTS) { System.out.println(msg); // for (int j = 0; j < net.getnLayers(); j++) -// System.out.println("Layer " + j + " # params: " + net.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + net.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, @@ -1063,7 +1063,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { if (PRINT_RESULTS) { System.out.println(testName); // for (int j = 0; j < graph.getNumLayers(); j++) -// System.out.println("Layer " + j + " # params: " + graph.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + graph.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.GraphConfig().net(graph).inputs(new INDArray[]{in1, in2}) @@ -1121,7 +1121,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { if (PRINT_RESULTS) { System.out.println(testName); // for (int j = 0; j < graph.getNumLayers(); j++) -// System.out.println("Layer " + j + " # params: " + graph.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + graph.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.GraphConfig().net(graph).inputs(new INDArray[]{in1, in2}) @@ -1179,7 +1179,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { if (PRINT_RESULTS) { System.out.println(testName); // for (int j = 0; j < graph.getNumLayers(); j++) -// System.out.println("Layer " + j + " # params: " + graph.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + graph.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.GraphConfig().net(graph).inputs(new INDArray[]{in1, in2}) @@ -1242,7 +1242,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { if (PRINT_RESULTS) { System.out.println(testName); // for (int j = 0; j < graph.getNumLayers(); j++) -// System.out.println("Layer " + j + " # params: " + graph.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + graph.getLayer(j).numParams()); } graph.setLayerMaskArrays(new INDArray[] {inMask1, inMask2}, null); @@ -1301,7 +1301,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { if (PRINT_RESULTS) { System.out.println(testName); // for (int j = 0; j < graph.getNumLayers(); j++) -// System.out.println("Layer " + j + " # params: " + graph.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + graph.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.GraphConfig().net(graph).inputs(new INDArray[]{in1, in2}) @@ -1347,7 +1347,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { if (PRINT_RESULTS) { System.out.println(testName); // for (int j = 0; j < graph.getNumLayers(); j++) -// System.out.println("Layer " + j + " # params: " + graph.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + graph.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.GraphConfig().net(graph).inputs(new INDArray[]{in1}) @@ -1398,7 +1398,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { if (PRINT_RESULTS) { System.out.println(testName); // for (int j = 0; j < graph.getNumLayers(); j++) -// System.out.println("Layer " + j + " # params: " + graph.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + graph.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.GraphConfig().net(graph).inputs(new INDArray[]{in1}) @@ -1436,7 +1436,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { if (PRINT_RESULTS) { System.out.println("testGraphEmbeddingLayerSimple"); // for (int j = 0; j < cg.getNumLayers(); j++) -// System.out.println("Layer " + j + " # params: " + cg.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + cg.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.GraphConfig().net(cg).inputs(new INDArray[]{input}) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/LRNGradientCheckTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/LRNGradientCheckTests.java index 9d982818a..87ea20cf5 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/LRNGradientCheckTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/LRNGradientCheckTests.java @@ -84,7 +84,7 @@ public class LRNGradientCheckTests extends BaseDL4JTest { // if (PRINT_RESULTS) { // for (int j = 0; j < mln.getnLayers(); j++) -// System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + mln.getLayer(j).numParams()); // } boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/LSTMGradientCheckTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/LSTMGradientCheckTests.java index c1e20d858..a2c7d7039 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/LSTMGradientCheckTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/LSTMGradientCheckTests.java @@ -126,7 +126,7 @@ public class LSTMGradientCheckTests extends BaseDL4JTest { if (PRINT_RESULTS) { System.out.println(testName); // for (int j = 0; j < mln.getnLayers(); j++) -// System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + mln.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, @@ -215,7 +215,7 @@ public class LSTMGradientCheckTests extends BaseDL4JTest { if (PRINT_RESULTS) { System.out.println(testName); // for (int j = 0; j < mln.getnLayers(); j++) -// System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + mln.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.MLNConfig().net(mln).input(input) @@ -343,7 +343,7 @@ public class LSTMGradientCheckTests extends BaseDL4JTest { + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", l2=" + l2 + ", l1=" + l1); // for (int j = 0; j < mln.getnLayers(); j++) -// System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + mln.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/NoBiasGradientCheckTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/NoBiasGradientCheckTests.java index 5cfec0631..477199be0 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/NoBiasGradientCheckTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/NoBiasGradientCheckTests.java @@ -78,7 +78,7 @@ public class NoBiasGradientCheckTests extends BaseDL4JTest { .dist(new NormalDistribution(0, 1)) .activation(Activation.TANH) - .hasBias(true) //Layer 0: Always have a bias + .hasBias(true) //ILayer 0: Always have a bias .build()) .layer(1, new DenseLayer.Builder().nIn(layerSize).nOut(layerSize) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/OutputLayerGradientChecks.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/OutputLayerGradientChecks.java index 1c1da4cee..0928b52de 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/OutputLayerGradientChecks.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/OutputLayerGradientChecks.java @@ -137,7 +137,7 @@ public class OutputLayerGradientChecks extends BaseDL4JTest { if (PRINT_RESULTS) { System.out.println(testName); // for (int j = 0; j < mln.getnLayers(); j++) -// System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + mln.getLayer(j).numParams()); } System.out.println("Starting test: " + testName); @@ -244,7 +244,7 @@ public class OutputLayerGradientChecks extends BaseDL4JTest { if (PRINT_RESULTS) { System.out.println(testName); // for (int j = 0; j < mln.getnLayers(); j++) -// System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + mln.getLayer(j).numParams()); } System.out.println("Starting test: " + testName); @@ -393,7 +393,7 @@ public class OutputLayerGradientChecks extends BaseDL4JTest { if (PRINT_RESULTS) { System.out.println(testName); // for (int j = 0; j < mln.getnLayers(); j++) -// System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + mln.getLayer(j).numParams()); } System.out.println("Starting test: " + testName); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/VaeGradientCheckTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/VaeGradientCheckTests.java index 92ddf8622..40041885e 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/VaeGradientCheckTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/VaeGradientCheckTests.java @@ -124,7 +124,7 @@ public class VaeGradientCheckTests extends BaseDL4JTest { if (PRINT_RESULTS) { System.out.println(msg); // for (int j = 0; j < mln.getnLayers(); j++) -// System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + mln.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, @@ -195,7 +195,7 @@ public class VaeGradientCheckTests extends BaseDL4JTest { if (PRINT_RESULTS) { System.out.println(msg); // for (int l = 0; l < mln.getnLayers(); l++) -// System.out.println("Layer " + l + " # params: " + mln.getLayer(l).numParams()); +// System.out.println("ILayer " + l + " # params: " + mln.getLayer(l).numParams()); } boolean gradOK = GradientCheckUtil.checkGradientsPretrainLayer(layer, DEFAULT_EPS, @@ -283,7 +283,7 @@ public class VaeGradientCheckTests extends BaseDL4JTest { if (PRINT_RESULTS) { System.out.println(msg); // for (int j = 0; j < mln.getnLayers(); j++) -// System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + mln.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradientsPretrainLayer(layer, DEFAULT_EPS, @@ -325,7 +325,7 @@ public class VaeGradientCheckTests extends BaseDL4JTest { if (PRINT_RESULTS) { System.out.println(msg); // for (int j = 0; j < mln.getnLayers(); j++) -// System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams()); +// System.out.println("ILayer " + j + " # params: " + mln.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradientsPretrainLayer(layer, DEFAULT_EPS, diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigTest.java index 60b549714..be25a0ccd 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigTest.java @@ -133,8 +133,8 @@ public class LayerConfigTest extends BaseDL4JTest { //Learning rate without layerwise override: MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(0.3).list() - .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) - .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); + .layer(0, new DenseLayerConfiguration.Builder().nIn(2).nOut(2).build()) + .layer(1, new DenseLayerConfiguration.Builder().nIn(2).nOut(2).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -143,8 +143,8 @@ public class LayerConfigTest extends BaseDL4JTest { //With: conf = new NeuralNetConfiguration.Builder().learningRate(0.3).list() - .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) - .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).learningRate(0.2).build()).build(); + .layer(0, new DenseLayerConfiguration.Builder().nIn(2).nOut(2).build()) + .layer(1, new DenseLayerConfiguration.Builder().nIn(2).nOut(2).learningRate(0.2).build()).build(); net = new MultiLayerNetwork(conf); net.init(); @@ -154,8 +154,8 @@ public class LayerConfigTest extends BaseDL4JTest { //L1 and L2 without layerwise override: conf = new NeuralNetConfiguration.Builder().l1(0.1).l2(0.2).list() - .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) - .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); + .layer(0, new DenseLayerConfiguration.Builder().nIn(2).nOut(2).build()) + .layer(1, new DenseLayerConfiguration.Builder().nIn(2).nOut(2).build()).build(); net = new MultiLayerNetwork(conf); net.init(); @@ -166,8 +166,8 @@ public class LayerConfigTest extends BaseDL4JTest { //L1 and L2 with layerwise override: conf = new NeuralNetConfiguration.Builder().l1(0.1).l2(0.2).list() - .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).l1(0.9).build()) - .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).l2(0.8).build()).build(); + .layer(0, new DenseLayerConfiguration.Builder().nIn(2).nOut(2).l1(0.9).build()) + .layer(1, new DenseLayerConfiguration.Builder().nIn(2).nOut(2).l2(0.8).build()).build(); net = new MultiLayerNetwork(conf); net.init(); @@ -326,8 +326,8 @@ public class LayerConfigTest extends BaseDL4JTest { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr) .updater(Updater.SGD) .learningRateDecayPolicy(LearningRatePolicy.Exponential).lrPolicyDecayRate(lrDecayRate).list() - .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) - .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); + .layer(0, new DenseLayerConfiguration.Builder().nIn(2).nOut(2).build()) + .layer(1, new DenseLayerConfiguration.Builder().nIn(2).nOut(2).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -345,8 +345,8 @@ public class LayerConfigTest extends BaseDL4JTest { int iterations = 1; MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().iterations(iterations).learningRate(lr) .learningRateDecayPolicy(LearningRatePolicy.Inverse).lrPolicyDecayRate(lrDecayRate) - .lrPolicyPower(power).list().layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) - .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); + .lrPolicyPower(power).list().layer(0, new DenseLayerConfiguration.Builder().nIn(2).nOut(2).build()) + .layer(1, new DenseLayerConfiguration.Builder().nIn(2).nOut(2).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -367,8 +367,8 @@ public class LayerConfigTest extends BaseDL4JTest { int iterations = 1; MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().iterations(iterations).learningRate(lr) .learningRateDecayPolicy(LearningRatePolicy.Step).lrPolicyDecayRate(lrDecayRate) - .lrPolicySteps(steps).list().layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) - .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); + .lrPolicySteps(steps).list().layer(0, new DenseLayerConfiguration.Builder().nIn(2).nOut(2).build()) + .layer(1, new DenseLayerConfiguration.Builder().nIn(2).nOut(2).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -388,8 +388,8 @@ public class LayerConfigTest extends BaseDL4JTest { int iterations = 1; MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().iterations(iterations).learningRate(lr) .learningRateDecayPolicy(LearningRatePolicy.Poly).lrPolicyDecayRate(lrDecayRate) - .lrPolicyPower(power).list().layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) - .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); + .lrPolicyPower(power).list().layer(0, new DenseLayerConfiguration.Builder().nIn(2).nOut(2).build()) + .layer(1, new DenseLayerConfiguration.Builder().nIn(2).nOut(2).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -409,8 +409,8 @@ public class LayerConfigTest extends BaseDL4JTest { int iterations = 1; MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().iterations(iterations).learningRate(lr) .learningRateDecayPolicy(LearningRatePolicy.Sigmoid).lrPolicyDecayRate(lrDecayRate) - .lrPolicySteps(steps).list().layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) - .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); + .lrPolicySteps(steps).list().layer(0, new DenseLayerConfiguration.Builder().nIn(2).nOut(2).build()) + .layer(1, new DenseLayerConfiguration.Builder().nIn(2).nOut(2).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/dtypes/DTypeTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/dtypes/DTypeTests.java index b3e625849..edad9fb7d 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/dtypes/DTypeTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/dtypes/DTypeTests.java @@ -229,7 +229,7 @@ public class DTypeTests extends BaseDL4JTest { if (seenLayers.size() < layerClasses.size()) { for (Class c : layerClasses) { if (!seenLayers.contains(c) && !ignoreClasses.contains(c)) { - log.warn("Layer class not tested for global vs. network datatypes: {}", c); + log.warn("ILayer class not tested for global vs. network datatypes: {}", c); fail = true; } } @@ -279,7 +279,7 @@ public class DTypeTests extends BaseDL4JTest { } public static void logUsedClasses(ComputationGraph net) { - ComputationGraphConfiguration conf = net.getConfiguration(); + ComputationGraphConfiguration conf = net.getComputationGraphConfiguration(); for (GraphVertex gv : conf.getVertices().values()) { seenVertices.add(gv.getClass()); if (gv instanceof LayerVertex) { diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/ComputationGraphTestRNN.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/ComputationGraphTestRNN.java index eb8c1cbcc..2d2379fdb 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/ComputationGraphTestRNN.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/ComputationGraphTestRNN.java @@ -65,7 +65,7 @@ public class ComputationGraphTestRNN extends BaseDL4JTest { Nd4j.getRandom().setSeed(12345); int timeSeriesLength = 12; - //4 layer network: 2 GravesLSTM + DenseLayer + RnnOutputLayer. Hence also tests preprocessors. + //4 layer network: 2 GravesLSTM + DenseLayerConfiguration + RnnOutputLayer. Hence also tests preprocessors. ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).graphBuilder() .addInputs("in") .addLayer("0", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(5).nOut(7) @@ -208,7 +208,7 @@ public class ComputationGraphTestRNN extends BaseDL4JTest { Nd4j.getRandom().setSeed(12345); int timeSeriesLength = 12; - //4 layer network: 2 GravesLSTM + DenseLayer + RnnOutputLayer. Hence also tests preprocessors. + //4 layer network: 2 GravesLSTM + DenseLayerConfiguration + RnnOutputLayer. Hence also tests preprocessors. //Network architecture: lstm0 -> Dense -> RnnOutputLayer0 // and lstm1 -> Dense -> RnnOutputLayer1 ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).graphBuilder() @@ -391,9 +391,9 @@ public class ComputationGraphTestRNN extends BaseDL4JTest { graphTBPTT.init(); graphTBPTT.clearTbpttState = false; - assertEquals(BackpropType.TruncatedBPTT, graphTBPTT.getConfiguration().getBackpropType()); - assertEquals(timeSeriesLength, graphTBPTT.getConfiguration().getTbpttFwdLength()); - assertEquals(timeSeriesLength, graphTBPTT.getConfiguration().getTbpttBackLength()); + assertEquals(BackpropType.TruncatedBPTT, graphTBPTT.getComputationGraphConfiguration().getBackpropType()); + assertEquals(timeSeriesLength, graphTBPTT.getComputationGraphConfiguration().getTbpttFwdLength()); + assertEquals(timeSeriesLength, graphTBPTT.getComputationGraphConfiguration().getTbpttBackLength()); INDArray inputData = Nd4j.rand(miniBatchSize, nIn, timeSeriesLength); INDArray labels = Nd4j.rand(miniBatchSize, nOut, timeSeriesLength); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestCompGraphUnsupervised.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestCompGraphUnsupervised.java index a17979bf2..794538c36 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestCompGraphUnsupervised.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestCompGraphUnsupervised.java @@ -42,7 +42,6 @@ import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.conditions.Conditions; import org.nd4j.linalg.learning.config.Adam; -import java.util.Arrays; import java.util.HashMap; import java.util.Map; @@ -168,8 +167,8 @@ public class TestCompGraphUnsupervised extends BaseDL4JTest { net.init(); ComputationGraph cg = net.toComputationGraph(); - cg.getConfiguration().setInferenceWorkspaceMode(wsm); - cg.getConfiguration().setTrainingWorkspaceMode(wsm); + cg.getComputationGraphConfiguration().setInferenceWorkspaceMode(wsm); + cg.getComputationGraphConfiguration().setTrainingWorkspaceMode(wsm); DataSetIterator ds = new EarlyTerminationDataSetIterator(new MnistDataSetIterator(1, true, 12345), 1); Nd4j.getRandom().setSeed(12345); net.pretrainLayer(0, ds); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestComputationGraphNetwork.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestComputationGraphNetwork.java index 7a918a674..a6373c6a9 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestComputationGraphNetwork.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestComputationGraphNetwork.java @@ -1033,15 +1033,15 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { DataSetIterator iter = new IrisDataSetIterator(50, 150); - assertEquals(0, network.getConfiguration().getIterationCount()); + assertEquals(0, network.getComputationGraphConfiguration().getIterationCount()); network.fit(iter); - assertEquals(3, network.getConfiguration().getIterationCount()); + assertEquals(3, network.getComputationGraphConfiguration().getIterationCount()); iter.reset(); network.fit(iter); - assertEquals(6, network.getConfiguration().getIterationCount()); + assertEquals(6, network.getComputationGraphConfiguration().getIterationCount()); iter.reset(); network.fit(iter.next()); - assertEquals(7, network.getConfiguration().getIterationCount()); + assertEquals(7, network.getComputationGraphConfiguration().getIterationCount()); ByteArrayOutputStream baos = new ByteArrayOutputStream(); ModelSerializer.writeModel(network, baos, true); @@ -1049,7 +1049,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { ByteArrayInputStream bais = new ByteArrayInputStream(asBytes); ComputationGraph net = ModelSerializer.restoreComputationGraph(bais, true); - assertEquals(7, net.getConfiguration().getIterationCount()); + assertEquals(7, net.getComputationGraphConfiguration().getIterationCount()); } @Test @@ -1272,18 +1272,18 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { ComputationGraph net = new ComputationGraph(conf); net.init(); - assertEquals(0, net.getConfiguration().getEpochCount()); + assertEquals(0, net.getComputationGraphConfiguration().getEpochCount()); DataSetIterator iter = new IrisDataSetIterator(150, 150); for( int i=0; i<4; i++ ){ - assertEquals(i, net.getConfiguration().getEpochCount()); + assertEquals(i, net.getComputationGraphConfiguration().getEpochCount()); net.fit(iter); - assertEquals(i+1, net.getConfiguration().getEpochCount()); + assertEquals(i+1, net.getComputationGraphConfiguration().getEpochCount()); } - assertEquals(4, net.getConfiguration().getEpochCount()); + assertEquals(4, net.getComputationGraphConfiguration().getEpochCount()); ByteArrayOutputStream baos = new ByteArrayOutputStream(); @@ -1293,7 +1293,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { ByteArrayInputStream bais = new ByteArrayInputStream(bytes); ComputationGraph restored = ModelSerializer.restoreComputationGraph(bais, true); - assertEquals(4, restored.getConfiguration().getEpochCount()); + assertEquals(4, restored.getComputationGraphConfiguration().getEpochCount()); } @Test @@ -1619,13 +1619,13 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { GraphIndices indices = cg.calculateIndices(); int[] order = cg.topologicalSortOrder(); - List strOrder = cg.getConfiguration().getTopologicalOrderStr(); + List strOrder = cg.getComputationGraphConfiguration().getTopologicalOrderStr(); INDArray[] out1 = cg.output(in); //Check it's the same after loading: ComputationGraph cg2 = TestUtils.testModelSerialization(cg); int[] order2 = cg2.topologicalSortOrder(); - List strOrder2 = cg.getConfiguration().getTopologicalOrderStr(); + List strOrder2 = cg.getComputationGraphConfiguration().getTopologicalOrderStr(); assertArrayEquals(order, order2); assertEquals(strOrder, strOrder2); @@ -1633,7 +1633,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { assertArrayEquals(out1, out2); //Delete the topological order, ensure it gets recreated properly: - ComputationGraphConfiguration conf3 = cg2.getConfiguration().clone(); + ComputationGraphConfiguration conf3 = cg2.getComputationGraphConfiguration().clone(); conf3.setTopologicalOrder(null); conf3.setTopologicalOrderStr(null); ComputationGraph cg3 = new ComputationGraph(conf3); @@ -1641,7 +1641,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { cg3.setParams(cg2.params()); int[] order3 = cg3.topologicalSortOrder(); - List strOrder3 = cg.getConfiguration().getTopologicalOrderStr(); + List strOrder3 = cg.getComputationGraphConfiguration().getTopologicalOrderStr(); INDArray[] out3 = cg3.output(in); assertArrayEquals(order, order3); assertEquals(strOrder, strOrder3); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/FrozenLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/FrozenLayerTest.java index c3543e167..0f506dbfe 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/FrozenLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/FrozenLayerTest.java @@ -235,7 +235,7 @@ public class FrozenLayerTest extends BaseDL4JTest { ComputationGraph clonedModel = modelNow.clone(); //Check json - assertEquals(clonedModel.getConfiguration().toJson(), modelNow.getConfiguration().toJson()); + assertEquals(clonedModel.getComputationGraphConfiguration().toJson(), modelNow.getComputationGraphConfiguration().toJson()); //Check params assertEquals(modelNow.params(), clonedModel.params()); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/TestDropout.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/TestDropout.java index 67f66fb21..868f34ba7 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/TestDropout.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/TestDropout.java @@ -50,7 +50,7 @@ public class TestDropout extends BaseDL4JTest { @Test public void testDropoutSimple() throws Exception { //Testing dropout with a single layer - //Layer input: values should be set to either 0.0 or 2.0x original value + //ILayer input: values should be set to either 0.0 or 2.0x original value int nIn = 8; int nOut = 8; diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/embedding/EmbeddingLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/embedding/EmbeddingLayerTest.java index 259a38382..55c26b12b 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/embedding/EmbeddingLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/embedding/EmbeddingLayerTest.java @@ -200,7 +200,7 @@ public class EmbeddingLayerTest extends BaseDL4JTest { @Test public void testEmbeddingForwardPass() { //With the same parameters, embedding layer should have same activations as the equivalent one-hot representation - // input with a DenseLayer + // input with a DenseLayerConfiguration int nClassesIn = 10; @@ -243,7 +243,7 @@ public class EmbeddingLayerTest extends BaseDL4JTest { @Test public void testEmbeddingBackwardPass() { //With the same parameters, embedding layer should have same activations as the equivalent one-hot representation - // input with a DenseLayer + // input with a DenseLayerConfiguration int nClassesIn = 10; diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/ocnn/OCNNOutputLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/ocnn/OCNNOutputLayerTest.java index e9f76dfc2..0eaa156f1 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/ocnn/OCNNOutputLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/ocnn/OCNNOutputLayerTest.java @@ -104,7 +104,7 @@ public class OCNNOutputLayerTest extends BaseDL4JTest { + "ocnn" + "sigmoid" + ", doLearningFirst=" + doLearningFirst); for (int j = 0; j < network.getnLayers(); j++) - System.out.println("Layer " + j + " # params: " + network.getLayer(j).numParams()); + System.out.println("ILayer " + j + " # params: " + network.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(network, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffDense.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffDense.java index e84390916..3595282c0 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffDense.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffDense.java @@ -98,7 +98,7 @@ public class SameDiffDense extends SameDiffLayer { if(DefaultParamInitializer.BIAS_KEY.equals(e.getKey())){ e.getValue().assign(0.0); } else { - //Normally use 'c' order, but use 'f' for direct comparison to DL4J DenseLayer + //Normally use 'c' order, but use 'f' for direct comparison to DL4J DenseLayerConfiguration WeightInitUtil.initWeights(nIn, nOut, new long[]{nIn, nOut}, weightInit, null, 'f', e.getValue()); } } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffDenseVertex.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffDenseVertex.java index da674ea7c..baa4cee7e 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffDenseVertex.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffDenseVertex.java @@ -72,14 +72,14 @@ public class SameDiffDenseVertex extends SameDiffVertex { @Override public void initializeParameters(Map params) { - //Normally use 'c' order, but use 'f' for direct comparison to DL4J DenseLayer + //Normally use 'c' order, but use 'f' for direct comparison to DL4J DenseLayerConfiguration WeightInitUtil.initWeights(nIn, nOut, new long[]{nIn, nOut}, weightInit, null, 'f', params.get("W")); params.get("b").assign(0.0); } @Override public char paramReshapeOrder(String paramName){ - return 'f'; //To match DL4J DenseLayer - for easy comparison + return 'f'; //To match DL4J DenseLayerConfiguration - for easy comparison } @Override diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/WorkspaceTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/WorkspaceTests.java index cf7d31bd5..5b00685af 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/WorkspaceTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/WorkspaceTests.java @@ -73,8 +73,8 @@ public class WorkspaceTests extends BaseDL4JTest { ComputationGraph c = createNet(); for (WorkspaceMode wm : new WorkspaceMode[]{WorkspaceMode.NONE, WorkspaceMode.ENABLED}) { log.info("Starting test: {}", wm); - c.getConfiguration().setTrainingWorkspaceMode(wm); - c.getConfiguration().setInferenceWorkspaceMode(wm); + c.getComputationGraphConfiguration().setTrainingWorkspaceMode(wm); + c.getComputationGraphConfiguration().setInferenceWorkspaceMode(wm); INDArray f = Nd4j.rand(8, 1, 28, 28); INDArray l = Nd4j.rand(8, 10); @@ -666,8 +666,8 @@ public class WorkspaceTests extends BaseDL4JTest { ComputationGraph c = createNet(); for (WorkspaceMode wm : new WorkspaceMode[]{WorkspaceMode.NONE, WorkspaceMode.ENABLED}) { log.info("Starting test: {}", wm); - c.getConfiguration().setTrainingWorkspaceMode(wm); - c.getConfiguration().setInferenceWorkspaceMode(wm); + c.getComputationGraphConfiguration().setTrainingWorkspaceMode(wm); + c.getComputationGraphConfiguration().setInferenceWorkspaceMode(wm); INDArray f = Nd4j.rand(8, 1, 28, 28); INDArray l = Nd4j.rand(8, 10); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTest.java index 056f4a43e..49d70647c 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTest.java @@ -995,7 +995,7 @@ public class MultiLayerTest extends BaseDL4JTest { @Test public void testCompareLayerMethods(){ - //Simple test: compare .layer(int, Layer) and .layer(Layer) are identical + //Simple test: compare .layer(int, ILayer) and .layer(ILayer) are identical MultiLayerConfiguration conf1 = new NeuralNetConfiguration.Builder().seed(123).list() .layer(0, new DenseLayer.Builder().nIn(4).nOut(3).weightInit(WeightInit.XAVIER) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTestRNN.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTestRNN.java index 5064e44ab..a12bd88f9 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTestRNN.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTestRNN.java @@ -261,7 +261,7 @@ public class MultiLayerTestRNN extends BaseDL4JTest { Nd4j.getRandom().setSeed(12345); int timeSeriesLength = 12; - //4 layer network: 2 GravesLSTM + DenseLayer + RnnOutputLayer. Hence also tests preprocessors. + //4 layer network: 2 GravesLSTM + DenseLayerConfiguration + RnnOutputLayer. Hence also tests preprocessors. MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).list() .layer(0, l0) .layer(1, l1) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/rl/TestMultiModelGradientApplication.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/rl/TestMultiModelGradientApplication.java index 410abf970..92b8375dd 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/rl/TestMultiModelGradientApplication.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/rl/TestMultiModelGradientApplication.java @@ -216,8 +216,8 @@ public class TestMultiModelGradientApplication extends BaseDL4JTest { net2GradUpd.getUpdater().getStateViewArray()); //Remove the next 2 lines: fails - as net 1 is 1 iteration ahead - net1GradCalc.getConfiguration().setIterationCount(0); - net2GradUpd.getConfiguration().setIterationCount(0); + net1GradCalc.getComputationGraphConfiguration().setIterationCount(0); + net2GradUpd.getComputationGraphConfiguration().setIterationCount(0); for (int i = 0; i < 100; i++) { diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TestTransferLearningModelSerializer.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TestTransferLearningModelSerializer.java index ad92a7c47..44c3bcb07 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TestTransferLearningModelSerializer.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TestTransferLearningModelSerializer.java @@ -120,7 +120,7 @@ public class TestTransferLearningModelSerializer extends BaseDL4JTest { assertTrue(withFrozen.getLayer(0) instanceof FrozenLayer); assertTrue(withFrozen.getLayer(1) instanceof FrozenLayer); - Map m = withFrozen.getConfiguration().getVertices(); + Map m = withFrozen.getComputationGraphConfiguration().getVertices(); Layer l0 = ((LayerVertex) m.get("0")).getLayerConf().getLayer(); Layer l1 = ((LayerVertex) m.get("1")).getLayerConf().getLayer(); assertTrue(l0 instanceof org.deeplearning4j.nn.conf.layers.misc.FrozenLayer); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningCompGraphTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningCompGraphTest.java index a81d96838..efc821b6e 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningCompGraphTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningCompGraphTest.java @@ -102,7 +102,7 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { .build(); //Check json - assertEquals(expectedConf.toJson(), modelNow.getConfiguration().toJson()); + assertEquals(expectedConf.toJson(), modelNow.getComputationGraphConfiguration().toJson()); //Check params after fit modelNow.fit(randomData); @@ -382,7 +382,7 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { modelExpectedArch.getVertex("layer0").setLayerAsFrozen(); modelExpectedArch.getVertex("layer1").setLayerAsFrozen(); - assertEquals(modelExpectedArch.getConfiguration().toJson(), modelNow.getConfiguration().toJson()); + assertEquals(modelExpectedArch.getComputationGraphConfiguration().toJson(), modelNow.getComputationGraphConfiguration().toJson()); modelNow.setParams(modelExpectedArch.params()); int i = 0; @@ -445,7 +445,7 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { // assertEquals(confExpected, graph.getConfiguration()); - assertEquals(confExpected.toJson(), graph.getConfiguration().toJson()); + assertEquals(confExpected.toJson(), graph.getComputationGraphConfiguration().toJson()); } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningHelperTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningHelperTest.java index 0e78a3d6c..d7e58be43 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningHelperTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningHelperTest.java @@ -126,7 +126,7 @@ public class TransferLearningHelperTest extends BaseDL4JTest { .setOutputs("outLeft", "outCentre", "outRight").build(); ComputationGraph expectedModel = new ComputationGraph(expectedConf); expectedModel.init(); - assertEquals(expectedConf.toJson(), modelSubset.getConfiguration().toJson()); + assertEquals(expectedConf.toJson(), modelSubset.getComputationGraphConfiguration().toJson()); } @Test diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimize/solver/TestOptimizers.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimize/solver/TestOptimizers.java index 5b7bec134..73e1a7a56 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimize/solver/TestOptimizers.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimize/solver/TestOptimizers.java @@ -764,7 +764,7 @@ public class TestOptimizers extends BaseDL4JTest { } - /** Simple abstract class to deal with the fact that we don't care about the majority of the Model/Layer + /** Simple abstract class to deal with the fact that we don't care about the majority of the Model/ILayer * methods here. Classes extending this model for optimizer tests need only implement the score() and * gradient() methods. */ @@ -907,7 +907,7 @@ public class TestOptimizers extends BaseDL4JTest { @Override public INDArray input() { - //Work-around for BaseUpdater.postApply(): Uses Layer.input().size(0) + //Work-around for BaseUpdater.postApply(): Uses ILayer.input().size(0) //in order to get mini-batch size. i.e., divide by 1 here. return Nd4j.zeros(1); } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest060.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest060.java index 985f347d8..87a53e54a 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest060.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest060.java @@ -221,7 +221,7 @@ public class RegressionTest060 extends BaseDL4JTest { ComputationGraph net = ModelSerializer.restoreComputationGraph(f, true); - ComputationGraphConfiguration conf = net.getConfiguration(); + ComputationGraphConfiguration conf = net.getComputationGraphConfiguration(); assertEquals(3, conf.getVertices().size()); GravesLSTM l0 = (GravesLSTM) ((LayerVertex) conf.getVertices().get("0")).getLayerConf().getLayer(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest071.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest071.java index 2a75e7994..0dc3839bb 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest071.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest071.java @@ -221,7 +221,7 @@ public class RegressionTest071 extends BaseDL4JTest { ComputationGraph net = ModelSerializer.restoreComputationGraph(f, true); - ComputationGraphConfiguration conf = net.getConfiguration(); + ComputationGraphConfiguration conf = net.getComputationGraphConfiguration(); assertEquals(3, conf.getVertices().size()); GravesLSTM l0 = (GravesLSTM) ((LayerVertex) conf.getVertices().get("0")).getLayerConf().getLayer(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest080.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest080.java index 6566f03fe..6460582ba 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest080.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest080.java @@ -237,7 +237,7 @@ public class RegressionTest080 extends BaseDL4JTest { ComputationGraph net = ModelSerializer.restoreComputationGraph(f, true); - ComputationGraphConfiguration conf = net.getConfiguration(); + ComputationGraphConfiguration conf = net.getComputationGraphConfiguration(); assertEquals(3, conf.getVertices().size()); GravesLSTM l0 = (GravesLSTM) ((LayerVertex) conf.getVertices().get("0")).getLayerConf().getLayer(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100a.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100a.java index acee54871..f294e16a7 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100a.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100a.java @@ -171,7 +171,7 @@ public class RegressionTest100a extends BaseDL4JTest { int nBoxes = 5; int nClasses = 10; - ConvolutionLayer cl = (ConvolutionLayer)((LayerVertex)net.getConfiguration().getVertices().get("convolution2d_9")).getLayerConf().getLayer(); + ConvolutionLayer cl = (ConvolutionLayer)((LayerVertex)net.getComputationGraphConfiguration().getVertices().get("convolution2d_9")).getLayerConf().getLayer(); assertEquals(nBoxes * (5 + nClasses), cl.getNOut()); assertEquals(new ActivationIdentity(), cl.getActivationFn()); assertEquals(ConvolutionMode.Same, cl.getConvolutionMode()); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b3.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b3.java index 8df2f258b..35fb7391b 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b3.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b3.java @@ -206,7 +206,7 @@ public class RegressionTest100b3 extends BaseDL4JTest { int nBoxes = 5; int nClasses = 10; - ConvolutionLayer cl = (ConvolutionLayer)((LayerVertex)net.getConfiguration().getVertices().get("convolution2d_9")).getLayerConf().getLayer(); + ConvolutionLayer cl = (ConvolutionLayer)((LayerVertex)net.getComputationGraphConfiguration().getVertices().get("convolution2d_9")).getLayerConf().getLayer(); assertEquals(nBoxes * (5 + nClasses), cl.getNOut()); assertEquals(new ActivationIdentity(), cl.getActivationFn()); assertEquals(ConvolutionMode.Same, cl.getConvolutionMode()); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b4.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b4.java index 5b4270a4e..00e46bf0c 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b4.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b4.java @@ -224,7 +224,7 @@ public class RegressionTest100b4 extends BaseDL4JTest { int nBoxes = 5; int nClasses = 10; - ConvolutionLayer cl = (ConvolutionLayer) ((LayerVertex) net.getConfiguration().getVertices() + ConvolutionLayer cl = (ConvolutionLayer) ((LayerVertex) net.getComputationGraphConfiguration().getVertices() .get("convolution2d_9")).getLayerConf().getLayer(); assertEquals(nBoxes * (5 + nClasses), cl.getNOut()); assertEquals(new ActivationIdentity(), cl.getActivationFn()); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b6.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b6.java index 40df45924..15a9c2bc3 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b6.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b6.java @@ -205,7 +205,7 @@ public class RegressionTest100b6 extends BaseDL4JTest { int nBoxes = 5; int nClasses = 10; - ConvolutionLayer cl = (ConvolutionLayer) ((LayerVertex) net.getConfiguration().getVertices() + ConvolutionLayer cl = (ConvolutionLayer) ((LayerVertex) net.getComputationGraphConfiguration().getVertices() .get("convolution2d_9")).getLayerConf().getLayer(); assertEquals(nBoxes * (5 + nClasses), cl.getNOut()); assertEquals(new ActivationIdentity(), cl.getActivationFn()); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/customlayer100a/CustomLayer.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/customlayer100a/CustomLayer.java index 00a2b6242..acb3963b1 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/customlayer100a/CustomLayer.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/customlayer100a/CustomLayer.java @@ -94,7 +94,7 @@ public class CustomLayer extends FeedForwardLayer { @Override public ParamInitializer initializer() { //This method returns the parameter initializer for this type of layer - //In this case, we can use the DefaultParamInitializer, which is the same one used for DenseLayer + //In this case, we can use the DefaultParamInitializer, which is the same one used for DenseLayerConfiguration //For more complex layers, you may need to implement a custom parameter initializer //See the various parameter initializers here: //https://github.com/deeplearning4j/deeplearning4j/tree/master/deeplearning4j-core/src/main/java/org/deeplearning4j/nn/params @@ -108,7 +108,7 @@ public class CustomLayer extends FeedForwardLayer { //If you don't need this functionality for your custom layer, you can return a LayerMemoryReport // with all 0s, or - //This implementation: based on DenseLayer implementation + //This implementation: based on DenseLayerConfiguration implementation InputType outputType = getOutputType(-1, inputType); val numParams = initializer().numParams(this); @@ -131,7 +131,7 @@ public class CustomLayer extends FeedForwardLayer { .workingMemory(0, 0, trainSizeFixed, trainSizeVariable) //No additional memory (beyond activations) for inference .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, - MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching in DenseLayer + MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching in DenseLayerConfiguration .build(); } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/CrashReportingUtilTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/CrashReportingUtilTest.java index 4da9883b8..8bfaa9eb2 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/CrashReportingUtilTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/CrashReportingUtilTest.java @@ -117,7 +117,7 @@ public class CrashReportingUtilTest extends BaseDL4JTest { String str = FileUtils.readFileToString(list[0]); // System.out.println(str); assertTrue(str.contains("Network Information")); - assertTrue(str.contains("Layer Helpers")); + assertTrue(str.contains("ILayer Helpers")); assertTrue(str.contains("JavaCPP")); assertTrue(str.contains("ScoreIterationListener")); @@ -134,7 +134,7 @@ public class CrashReportingUtilTest extends BaseDL4JTest { assertEquals(1, list.length); str = FileUtils.readFileToString(list[0]); assertTrue(str.contains("Network Information")); - assertTrue(str.contains("Layer Helpers")); + assertTrue(str.contains("ILayer Helpers")); assertTrue(str.contains("JavaCPP")); assertTrue(str.contains("ScoreIterationListener(1)")); @@ -150,7 +150,7 @@ public class CrashReportingUtilTest extends BaseDL4JTest { // System.out.println("///////////////////////////////////////////////////////////"); assertTrue(mlnMemoryInfo.contains("Network Information")); - assertTrue(mlnMemoryInfo.contains("Layer Helpers")); + assertTrue(mlnMemoryInfo.contains("ILayer Helpers")); assertTrue(mlnMemoryInfo.contains("JavaCPP")); assertTrue(mlnMemoryInfo.contains("ScoreIterationListener(1)")); @@ -172,7 +172,7 @@ public class CrashReportingUtilTest extends BaseDL4JTest { assertEquals(1, list.length); str = FileUtils.readFileToString(list[0]); assertTrue(str.contains("Network Information")); - assertTrue(str.contains("Layer Helpers")); + assertTrue(str.contains("ILayer Helpers")); assertTrue(str.contains("JavaCPP")); assertTrue(str.contains("ScoreIterationListener(1)")); @@ -187,7 +187,7 @@ public class CrashReportingUtilTest extends BaseDL4JTest { assertEquals(1, list.length); str = FileUtils.readFileToString(list[0]); assertTrue(str.contains("Network Information")); - assertTrue(str.contains("Layer Helpers")); + assertTrue(str.contains("ILayer Helpers")); assertTrue(str.contains("JavaCPP")); assertTrue(str.contains("ScoreIterationListener(1)")); @@ -203,7 +203,7 @@ public class CrashReportingUtilTest extends BaseDL4JTest { // System.out.println("///////////////////////////////////////////////////////////"); assertTrue(cgMemoryInfo.contains("Network Information")); - assertTrue(cgMemoryInfo.contains("Layer Helpers")); + assertTrue(cgMemoryInfo.contains("ILayer Helpers")); assertTrue(cgMemoryInfo.contains("JavaCPP")); assertTrue(cgMemoryInfo.contains("ScoreIterationListener(1)")); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/ModelSerializerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/ModelSerializerTest.java index 610cb0961..e01d42f01 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/ModelSerializerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/ModelSerializerTest.java @@ -151,7 +151,7 @@ public class ModelSerializerTest extends BaseDL4JTest { ComputationGraph network = ModelSerializer.restoreComputationGraph(tempFile); - assertEquals(network.getConfiguration().toJson(), cg.getConfiguration().toJson()); + assertEquals(network.getComputationGraphConfiguration().toJson(), cg.getComputationGraphConfiguration().toJson()); assertEquals(cg.params(), network.params()); assertEquals(cg.getUpdater().getStateViewArray(), network.getUpdater().getStateViewArray()); } @@ -177,7 +177,7 @@ public class ModelSerializerTest extends BaseDL4JTest { ComputationGraph network = ModelSerializer.restoreComputationGraph(fis); - assertEquals(network.getConfiguration().toJson(), cg.getConfiguration().toJson()); + assertEquals(network.getComputationGraphConfiguration().toJson(), cg.getComputationGraphConfiguration().toJson()); assertEquals(cg.params(), network.params()); assertEquals(cg.getUpdater().getStateViewArray(), network.getUpdater().getStateViewArray()); } diff --git a/cavis-dnn/cavis-dnn-cudnn/src/main/java/org/deeplearning4j/cuda/recurrent/CudnnLSTMHelper.java b/cavis-dnn/cavis-dnn-cudnn/src/main/java/org/deeplearning4j/cuda/recurrent/CudnnLSTMHelper.java index 120078d07..2b71d920a 100644 --- a/cavis-dnn/cavis-dnn-cudnn/src/main/java/org/deeplearning4j/cuda/recurrent/CudnnLSTMHelper.java +++ b/cavis-dnn/cavis-dnn-cudnn/src/main/java/org/deeplearning4j/cuda/recurrent/CudnnLSTMHelper.java @@ -198,7 +198,7 @@ public class CudnnLSTMHelper extends BaseCudnnHelper implements LSTMHelper { } if (!(activationFn instanceof ActivationTanH)) { supported = false; - log.warn("Not supported: Layer activation functions != ActivationTanH"); + log.warn("Not supported: ILayer activation functions != ActivationTanH"); } if (hasPeepholeConnections) { supported = false; diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasLayer.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasLayer.java index 5c8c829c4..601237b53 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasLayer.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasLayer.java @@ -295,7 +295,7 @@ public class KerasLayer { } /** - * Copy Keras layer weights to DL4J Layer. + * Copy Keras layer weights to DL4J ILayer. * * @param layer DL4J layer * @throws InvalidKerasConfigurationException Invalid Keras configuration @@ -358,7 +358,7 @@ public class KerasLayer { } /** - * Whether this Keras layer maps to a DL4J Layer. + * Whether this Keras layer maps to a DL4J ILayer. * * @return true or false */ @@ -367,9 +367,9 @@ public class KerasLayer { } /** - * Gets corresponding DL4J Layer, if any. + * Gets corresponding DL4J ILayer, if any. * - * @return DL4J Layer + * @return DL4J ILayer * @see org.deeplearning4j.nn.api.Layer */ public Layer getLayer() { diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasModel.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasModel.java index d4bf6ba92..ea0b99f0c 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasModel.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasModel.java @@ -583,8 +583,8 @@ public class KerasModel { graphBuilder.addVertex(layer.getLayerName(), layer.getVertex(), inboundLayerNamesArray); } else if (layer.isInputPreProcessor()) { if (preprocessor == null) - throw new UnsupportedKerasConfigurationException("Layer " + layer.getLayerName() - + " could not be mapped to Layer, Vertex, or InputPreProcessor"); + throw new UnsupportedKerasConfigurationException("ILayer " + layer.getLayerName() + + " could not be mapped to ILayer, Vertex, or InputPreProcessor"); graphBuilder.addVertex(layer.getLayerName(), new PreprocessorVertex(preprocessor), inboundLayerNamesArray); } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/config/KerasLayerConfiguration.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/config/KerasLayerConfiguration.java index a0082f4f1..d454d1e97 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/config/KerasLayerConfiguration.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/config/KerasLayerConfiguration.java @@ -246,7 +246,7 @@ public class KerasLayerConfiguration { private final String LAYER_FIELD_RATE = "rate"; private final String LAYER_FIELD_GAUSSIAN_VARIANCE = ""; // 1: sigma, 2: stddev - /* Layer wrappers */ + /* ILayer wrappers */ // Missing: TimeDistributed diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDense.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDense.java index 9eae1f08e..f49599ccf 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDense.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDense.java @@ -115,9 +115,9 @@ public class KerasDense extends KerasLayer { } /** - * Get DL4J DenseLayer. + * Get DL4J DenseLayerConfiguration. * - * @return DenseLayer + * @return DenseLayerConfiguration */ public DenseLayer getDenseLayer() { return (DenseLayer) this.layer; diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLSTM.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLSTM.java index 4e35a6867..e1c6be765 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLSTM.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLSTM.java @@ -211,10 +211,10 @@ public class KerasLSTM extends KerasLayer { } /** - * Get DL4J Layer. If returnSequences is true, this can be casted to an "LSTM" layer, otherwise it can be casted + * Get DL4J ILayer. If returnSequences is true, this can be casted to an "LSTM" layer, otherwise it can be casted * to a "LastTimeStep" layer. * - * @return LSTM Layer + * @return LSTM ILayer */ public Layer getLSTMLayer() { return layer; diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasSimpleRnn.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasSimpleRnn.java index ac2d4c234..ea71fc8d7 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasSimpleRnn.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasSimpleRnn.java @@ -184,7 +184,7 @@ public class KerasSimpleRnn extends KerasLayer { /** * Get DL4J SimpleRnn layer. * - * @return SimpleRnn Layer + * @return SimpleRnn ILayer */ public Layer getSimpleRnnLayer() { return this.layer; diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/wrappers/KerasBidirectional.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/wrappers/KerasBidirectional.java index fa5f5b508..ccbbbd9d6 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/wrappers/KerasBidirectional.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/wrappers/KerasBidirectional.java @@ -160,7 +160,7 @@ public class KerasBidirectional extends KerasLayer { /** * Return the underlying recurrent layer of this bidirectional layer * - * @return Layer, recurrent layer + * @return ILayer, recurrent layer */ public Layer getUnderlyingRecurrentLayer() { return kerasRnnlayer.getLayer(); @@ -169,7 +169,7 @@ public class KerasBidirectional extends KerasLayer { /** * Get DL4J Bidirectional layer. * - * @return Bidirectional Layer + * @return Bidirectional ILayer */ public Bidirectional getBidirectionalLayer() { return (Bidirectional) this.layer; diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/FullModelComparisons.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/FullModelComparisons.java index 1120dfbb8..f50df5084 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/FullModelComparisons.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/FullModelComparisons.java @@ -85,7 +85,7 @@ public class FullModelComparisons extends BaseDL4JTest { System.out.println(model.summary()); - // 1. Layer + // 1. ILayer LSTM firstLstm = (LSTM) model.getLayer(0); org.deeplearning4j.nn.conf.layers.LSTM firstConf = (org.deeplearning4j.nn.conf.layers.LSTM) firstLstm.conf().getLayer(); @@ -123,7 +123,7 @@ public class FullModelComparisons extends BaseDL4JTest { Assertions.assertEquals(b.getDouble(0, 192), -0.13569744, 1e-7); // Keras O Assertions.assertEquals(b.getDouble(0, 0), -0.2587392, 1e-7); // Keras C - // 2. Layer + // 2. ILayer LSTM secondLstm = (LSTM) ((LastTimeStepLayer) model.getLayer(1)).getUnderlying(); org.deeplearning4j.nn.conf.layers.LSTM secondConf = (org.deeplearning4j.nn.conf.layers.LSTM) secondLstm.conf().getLayer(); diff --git a/cavis-dnn/cavis-dnn-nn-api/src/main/java/net/brutex/ai/dnn/api/LayerConfiguration.java b/cavis-dnn/cavis-dnn-nn-api/src/main/java/net/brutex/ai/dnn/api/LayerConfiguration.java index 0b274cb8c..6b395a5b2 100644 --- a/cavis-dnn/cavis-dnn-nn-api/src/main/java/net/brutex/ai/dnn/api/LayerConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn-api/src/main/java/net/brutex/ai/dnn/api/LayerConfiguration.java @@ -39,4 +39,13 @@ public interface LayerConfiguration { */ org.deeplearning4j.nn.conf.inputs.InputType.Type getInputType(); + + /** + * Defines the valid input type for this Layer + * + * @return InputType + */ + org.deeplearning4j.nn.conf.inputs.InputType.Type getOutputType(); + + } diff --git a/cavis-dnn/cavis-dnn-nn/build.gradle b/cavis-dnn/cavis-dnn-nn/build.gradle index e0f85570d..0e097093d 100644 --- a/cavis-dnn/cavis-dnn-nn/build.gradle +++ b/cavis-dnn/cavis-dnn-nn/build.gradle @@ -22,7 +22,7 @@ apply from: "${project.rootProject.projectDir}/createTestBackends.gradle" dependencies { implementation platform(projects.cavisCommonPlatform) - implementation projects.cavisDnn.cavisDnnNnApi +// implementation projects.cavisDnn.cavisDnnNnApi implementation projects.cavisDnn.cavisDnnData.cavisDnnDataUtilityIterators implementation 'org.lucee:oswego-concurrent:1.3.4' implementation projects.cavisDnn.cavisDnnCommon @@ -57,4 +57,6 @@ dependencies { // define any required OkHttp artifacts without version implementation "com.squareup.okhttp3:okhttp" implementation "com.squareup.okhttp3:logging-interceptor" -} \ No newline at end of file +} +sourceCompatibility = JavaVersion.VERSION_11 +targetCompatibility = JavaVersion.VERSION_11 diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/conf/layer/LayerConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/ILayer.java similarity index 60% rename from cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/conf/layer/LayerConfiguration.java rename to cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/ILayer.java index 16c67b491..a43b94265 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/conf/layer/LayerConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/ILayer.java @@ -19,10 +19,28 @@ * */ -package net.brutex.ai.dnn.conf.layer; +package net.brutex.ai.dnn.api; -public abstract class LayerConfiguration { +/** + * This is an "executable" ILayer, that is based on a {@link ILayerConfiguration} + */ +public interface ILayer { + /** + * Get the underlying configuration for this ILayer + * @return configuration + */ + ILayerConfiguration getLayerConfiguration(); + /** + * Set the underlying layer configuration + * @param conf The new configuration + */ + void setLayerConfiguration(ILayerConfiguration conf); + /** + * An implementation should provide a method to validate the network + * @return true if no errors found; false otherwise + */ + boolean isValid(); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/conf/layer/FFLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/ILayerConfiguration.java similarity index 56% rename from cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/conf/layer/FFLayer.java rename to cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/ILayerConfiguration.java index d903e9002..e0f5d856b 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/conf/layer/FFLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/ILayerConfiguration.java @@ -19,34 +19,45 @@ * */ -package net.brutex.ai.dnn.conf.layer; - -import lombok.extern.slf4j.Slf4j; -import net.brutex.ai.dnn.api.Layer; -import net.brutex.ai.dnn.api.NeuralNetwork; -import net.brutex.ai.dnn.conf.layer.AbstractLayerConfiguration; -import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.conf.inputs.InputType.Type; - -@Slf4j -public class FFLayer extends AbstractLayerConfiguration { +package net.brutex.ai.dnn.api; +public interface ILayerConfiguration { /** - * Create and return an instance of a LayerConfiguration. + * Create and return an instance of a ILayerConfiguration. * * @param network the "holding" network for the instance * @return the new layer instance */ - @Override - public Layer instantiate(NeuralNetwork network) { - //Let's do some verifications first - if(getInputType() != Type.FF) { - log.error("The {} layer configuration must use an InputType of {}, but found {}", - this.getClass().getSimpleName(), - Type.FF.name(), - getInputType().name()); - } - return null; - } + ILayer instantiate(IModel network); + + + /** + * Defines the valid input type for this ILayer + * + * @return InputType + */ + org.deeplearning4j.nn.conf.inputs.InputType.Type getInputType(); + + + /** + * Defines the valid input type for this ILayer + * + * @return InputType + */ + org.deeplearning4j.nn.conf.inputs.InputType.Type getOutputType(); + + + /** + * Number of trainable parameter in this layer + * @return number of parameter + */ + long numParameters(); + + /** + * An implementation should provide a method to validate the network + * @return true if no errors found; false otherwise + */ + boolean isValid(); + } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/IModel.java b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/IModel.java new file mode 100644 index 000000000..f0c6a722a --- /dev/null +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/IModel.java @@ -0,0 +1,86 @@ +/* + * + * ****************************************************************************** + * * + * * This program and the accompanying materials are made available under the + * * terms of the Apache License, Version 2.0 which is available at + * * https://www.apache.org/licenses/LICENSE-2.0. + * * + * * See the NOTICE file distributed with this work for additional + * * information regarding copyright ownership. + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * * License for the specific language governing permissions and limitations + * * under the License. + * * + * * SPDX-License-Identifier: Apache-2.0 + * ***************************************************************************** + * + */ + +package net.brutex.ai.dnn.api; + +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.dataset.api.DataSet; +import org.nd4j.linalg.dataset.api.MultiDataSet; + +/** + * A Neural Network is an instance of a {@link INeuralNetworkConfiguration}, that can be trained, + * evaluated, saved, exported, etc. Its configuration state is defined with the + * {@link #setConfiguration(INeuralNetworkConfiguration)} and {@link #getConfiguration()} methods. + * + */ +public interface IModel { + + /** + * The configuration that defines this Neural Network + * + * @param conf the configuration to use for this network + */ + void setConfiguration(INeuralNetworkConfiguration conf); + INeuralNetworkConfiguration getConfiguration(); + + /** + * Fit the model for one iteration on the provided data + * + * @param features the examples to classify (one example in each row) + * @param labels the example labels(a binary outcome matrix) + * @param featuresMask The mask array for the features (used for variable length time series, etc). May be null. + * @param labelsMask The mask array for the labels (used for variable length time series, etc). May be null. + */ + void fit(INDArray features, INDArray labels, INDArray featuresMask, INDArray labelsMask); + + /** + * This method fits model with a given DataSet + * + * @param dataSet the dataset to use for training + */ + void fit(DataSet dataSet); + + /** + * This method fits model with a given MultiDataSet + * + * @param dataSet the multi dataset to use for training + */ + void fit(MultiDataSet dataSet); + + /** + * The name of the Neural Network + * @return the name + */ + String getName(); + + /** + * Set the name for this Neural Network + * @param name the name + */ + void setName(String name); + + /** + * An implementation should provide a method to validate the network + * @return true if no errors found; false otherwise + */ + boolean isValid(); + +} diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/NeuralNetwork.java b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/INeuralNetwork.java similarity index 58% rename from cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/NeuralNetwork.java rename to cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/INeuralNetwork.java index c9437b838..48d6c561b 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/NeuralNetwork.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/INeuralNetwork.java @@ -1,25 +1,27 @@ /* - * ****************************************************************************** - * * - * * - * * This program and the accompanying materials are made available under the - * * terms of the Apache License, Version 2.0 which is available at - * * https://www.apache.org/licenses/LICENSE-2.0. - * * - * * See the NOTICE file distributed with this work for additional - * * information regarding copyright ownership. - * * Unless required by applicable law or agreed to in writing, software - * * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * * License for the specific language governing permissions and limitations - * * under the License. - * * - * * SPDX-License-Identifier: Apache-2.0 - * ***************************************************************************** + * + * ****************************************************************************** + * * + * * This program and the accompanying materials are made available under the + * * terms of the Apache License, Version 2.0 which is available at + * * https://www.apache.org/licenses/LICENSE-2.0. + * * + * * See the NOTICE file distributed with this work for additional + * * information regarding copyright ownership. + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * * License for the specific language governing permissions and limitations + * * under the License. + * * + * * SPDX-License-Identifier: Apache-2.0 + * ***************************************************************************** + * */ -package org.deeplearning4j.nn.api; +package net.brutex.ai.dnn.api; +import net.brutex.ai.dnn.conf.NeuralNetworkConfiguration; import org.deeplearning4j.optimize.api.ConvexOptimizer; import org.nd4j.evaluation.IEvaluation; import org.nd4j.linalg.api.ndarray.INDArray; @@ -31,7 +33,7 @@ import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator; /** * @author raver119 */ -public interface NeuralNetwork { +public interface INeuralNetwork { /** * This method does initialization of model @@ -104,4 +106,17 @@ public interface NeuralNetwork { * @param iterator */ T[] doEvaluation(MultiDataSetIterator iterator, T... evaluations); + + /** + * A neural network is created from a configuration. + * @param conf the configuration to create the network from + */ + void setConfiguration(NeuralNetworkConfiguration conf); + + /** + * Return the configuration for this configuration + * @return + */ + NeuralNetworkConfiguration getConfiguration(); + } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/INeuralNetworkConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/INeuralNetworkConfiguration.java new file mode 100644 index 000000000..81d447fa3 --- /dev/null +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/INeuralNetworkConfiguration.java @@ -0,0 +1,52 @@ +/* + * + * ****************************************************************************** + * * + * * This program and the accompanying materials are made available under the + * * terms of the Apache License, Version 2.0 which is available at + * * https://www.apache.org/licenses/LICENSE-2.0. + * * + * * See the NOTICE file distributed with this work for additional + * * information regarding copyright ownership. + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * * License for the specific language governing permissions and limitations + * * under the License. + * * + * * SPDX-License-Identifier: Apache-2.0 + * ***************************************************************************** + * + */ + +package net.brutex.ai.dnn.api; + +import java.util.List; + +public interface INeuralNetworkConfiguration { + +} +/** + /** + * Provides a flat list of all embedded layer configurations, this + * can only be called after the layer is initialized or {@link #getLayerConfigurations()} is + * called. + * + * @return unstacked layer configurations + + List getLayerConfigurations(); + + + /** + * This uncollables any stacked layer configurations within building blocks like + * @link BuildingBlockLayer} + + void calculateInnerLayerConfigurations(); + + /** + * An implementation should provide a method to validate the network + * @return true if no errors found; false otherwise + + boolean isValid(); +} +**/ \ No newline at end of file diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/conf/NeuralNetworkConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/conf/NeuralNetworkConfiguration.java index e383ea9c7..51de9f873 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/conf/NeuralNetworkConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/conf/NeuralNetworkConfiguration.java @@ -22,32 +22,61 @@ package net.brutex.ai.dnn.conf; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.exc.InvalidTypeIdException; +import com.fasterxml.jackson.databind.node.ArrayNode; +import java.io.IOException; import java.io.Serializable; import java.util.ArrayList; import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Random; import lombok.Getter; import lombok.NonNull; import lombok.Setter; import lombok.Singular; import lombok.extern.jackson.Jacksonized; import lombok.extern.slf4j.Slf4j; -import net.brutex.ai.dnn.api.LayerConfiguration; +import net.brutex.ai.dnn.api.ILayerConfiguration; +import net.brutex.ai.dnn.api.INeuralNetworkConfiguration; import org.deeplearning4j.nn.conf.BackpropType; import org.deeplearning4j.nn.conf.CacheMode; +import org.deeplearning4j.nn.conf.GradientNormalization; import org.deeplearning4j.nn.conf.InputPreProcessor; +import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.WorkspaceMode; +import org.deeplearning4j.nn.conf.distribution.Distribution; import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.conf.layers.wrapper.BuildingBlockLayer; +import org.deeplearning4j.nn.conf.layers.BaseLayer; +import org.deeplearning4j.nn.conf.layers.BaseOutputLayer; +import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; +import org.deeplearning4j.nn.conf.memory.MemoryReport; +import org.deeplearning4j.nn.conf.memory.NetworkMemoryReport; +import org.deeplearning4j.nn.conf.serde.JsonMappers; +import org.deeplearning4j.nn.weights.IWeightInit; +import org.deeplearning4j.nn.weights.WeightInit; +import org.nd4j.linalg.activations.Activation; +import org.nd4j.linalg.activations.IActivation; +import org.nd4j.linalg.api.buffer.DataType; +import org.nd4j.linalg.learning.config.IUpdater; +import org.nd4j.linalg.lossfunctions.LossFunctions; +import org.nd4j.linalg.lossfunctions.impl.LossBinaryXENT; +import org.nd4j.linalg.lossfunctions.impl.LossMCXENT; +import org.nd4j.linalg.lossfunctions.impl.LossMSE; +import org.nd4j.linalg.lossfunctions.impl.LossNegativeLogLikelihood; /** - * The NeuralNetworkConfiguration is a sequential container for the different layers in your + * The INeuralNetworkConfiguration is a sequential container for the different layers in your * network (or other NeuralNetworkConfigurations). That said, NeuralNetworkConfigurations can be * stacked.

- * It then “chains” outputs to inputs sequentially for each NeuralNetworkConfiguration, + * It then “chains” outputs to inputs sequentially for each INeuralNetworkConfiguration, * finally returning the output of the "top" configuration. Any settings made, are inherited and can - * be overridden on a "deeper" level. For this use case, you need to wrap the NeuralNetworkConfiguration + * be overridden on a "deeper" level. For this use case, you need to wrap the INeuralNetworkConfiguration * into a BuildingBlockLayer * */ @@ -55,77 +84,54 @@ import org.deeplearning4j.nn.conf.layers.wrapper.BuildingBlockLayer; @JsonIgnoreProperties(ignoreUnknown = true) @lombok.Builder @Slf4j -public class NeuralNetworkConfiguration implements net.brutex.ai.dnn.api.NeuralNetworkConfiguration, Serializable, Cloneable { - - /** - * The default {@link CacheMode} for this configuration. Will be set to "NONE" if not specified otherwise. - * Valid values are
- * CacheMode.NONE,
- * CacheMode.HOST or
- * CacheMode.DEVICE
- */ - @NonNull - @lombok.Builder.Default private CacheMode cacheMode = CacheMode.NONE; - - @Getter @Setter @NonNull - protected WorkspaceMode trainingWorkspaceMode = WorkspaceMode.ENABLED; - - @Getter @Setter @NonNull - protected WorkspaceMode inferenceWorkspaceMode = WorkspaceMode.ENABLED; - - @Getter @Setter @NonNull - protected BackpropType backpropType = BackpropType.Standard; - - @Getter - protected Map inputPreProcessors = new HashMap<>(); - - - @Getter @Setter protected int tbpttFwdLength = 20; - @Getter @Setter protected int tbpttBackLength = 20; - - - /** - * The list of layer configurations in this configuration. They will be indexed automatically - * as the layers get added starting with index 0. - */ - @Singular @Getter - private List layerConfigurations; - - /** - * The name for this configuration. Defaults to "Anonymous NeuralNetworkConfiguration" if - * it is not specified. - */ - @lombok.Builder.Default @Getter - private String name = "Anonymous NeuralNetworkConfiguration"; - - - /** - * The {@link InputType} of the data for this network configuration - */ - private InputType inputType; +public class NeuralNetworkConfiguration extends NeuralNetConfiguration implements + INeuralNetworkConfiguration, Serializable, Cloneable { + private static final int DEFAULT_TBPTT_LENGTH = 20; + @Getter protected final List confs = new ArrayList<>(); /** * hidden list of layers, that "flattens" all the layers of this network and applies * inheritance. */ @lombok.Builder.ObtainVia(method = "calculateInnerLayers") - private final List innerLayerConfigurations; - - @Override - public void calculateInnerLayerConfigurations() { - List list = new ArrayList<>(); - for( LayerConfiguration layer : this.layerConfigurations) { - if(layer instanceof BuildingBlockLayer) { - BuildingBlockLayer blayer = (BuildingBlockLayer) layer; - blayer.getConf().calculateInnerLayerConfigurations(); - list.addAll(blayer.getConf().getLayerConfigurations()); - } else { - list.add(layer); - } - } - this.layerConfigurations = list; - } - + private final List innerLayerConfigurations; + @Getter @Setter @NonNull @Singular + protected List layers = new ArrayList<>(); + @Getter @Setter @NonNull @lombok.Builder.Default @Deprecated + protected WorkspaceMode trainingWorkspaceMode = WorkspaceMode.ENABLED; + @Getter @Setter @NonNull @lombok.Builder.Default @Deprecated + protected WorkspaceMode inferenceWorkspaceMode = WorkspaceMode.ENABLED; + /** + * The type of backprop. Default setting is used for most networks (MLP, CNN etc), but + * optionally truncated BPTT can be used for training recurrent neural networks. If using + * TruncatedBPTT make sure you set both tBPTTForwardLength() and tBPTTBackwardLength() + */ + @Getter @Setter @NonNull @lombok.Builder.Default + protected BackpropType backpropType = BackpropType.Standard; + @Getter + protected Map inputPreProcessors = new HashMap<>(); + /** + * When doing truncated BPTT: how many steps of forward pass should we do before doing + * (truncated) backprop?
Only applicable when doing + * backpropType(BackpropType.TruncatedBPTT)
Typically tBPTTForwardLength parameter is same + * as the tBPTTBackwardLength parameter, but may be larger than it in some circumstances (but + * never smaller)
Ideally your training data time series length should be divisible by this + * This is the k1 parameter on pg23 of + * http://www.cs.utoronto.ca/~ilya/pubs/ilya_sutskever_phd_thesis.pdf + * + * @param forwardLength Forward length > 0, >= backwardLength + */ + @Getter @Setter protected int tbpttFwdLength = 20; + /** + * When doing truncated BPTT: how many steps of backward should we do?
Only applicable when + * doing backpropType(BackpropType.TruncatedBPTT)
This is the k2 parameter on pg23 of + * http://www.cs.utoronto.ca/~ilya/pubs/ilya_sutskever_phd_thesis.pdf + * + * @param backwardLength <= forwardLength + */ + @Getter @Setter protected int tbpttBackLength = 20; /** * Creates and returns a copy of this object. * @@ -136,8 +142,564 @@ public class NeuralNetworkConfiguration implements net.brutex.ai.dnn.api.NeuralN * cannot be cloned. * @see Cloneable */ - @Override - protected Object clone() throws CloneNotSupportedException { - return super.clone(); + + //Nd4j.getRandom().setSeed(getConf(0).getSeed()); //TODO + //Counter for the number of parameter updates so far + // This is important for learning rate schedules, for example, and is stored here to ensure it is persisted + // for Spark and model serialization + @Getter @Setter + protected int iterationCount = 0; + //Counter for the number of epochs completed so far. Used for per-epoch schedules + @Getter @Setter + protected int epochCount = 0; + protected double dampingFactor = 100; + @Getter @Setter //todo why? + private Layer layer; + /** + * A seed for this network, will be random if not specified. + */ + @Getter @Setter @NonNull @lombok.Builder.Default + private long seed = new Random().nextLong(); + /** + * The default {@link CacheMode} for this configuration. Will be set to "NONE" if not specified otherwise. + * This method defines how/if preOutput cache is handled: NONE: cache disabled (default value) + * HOST: Host memory will be used DEVICE: GPU memory will be used (on CPU backends effect will + * be the same as for HOST) + * + * Valid values are
+ * CacheMode.NONE,
+ * CacheMode.HOST or
+ * CacheMode.DEVICE
+ * @param cacheMode + */ + @NonNull @Getter @Setter + @lombok.Builder.Default private CacheMode cacheMode = CacheMode.NONE; + /** + * The list of layer configurations in this configuration. They will be indexed automatically + * as the layers get added starting with index 0. + */ + @Singular @Getter + private List layerConfigurations; + /** + * The name for this configuration. Defaults to "Anonymous INeuralNetworkConfiguration" if + * it is not specified. + */ + @lombok.Builder.Default @Getter + private String name = "Anonymous INeuralNetworkConfiguration"; + /** + * The {@link InputType} of the data for this network configuration + */ + private InputType inputType; + /** + * Set the DataType for the network parameters and activations for all layers in the network. + * Default: Float + * + * @param dataType Datatype to use for parameters and activations + */ + @Getter @Setter @lombok.Builder.Default @NonNull + private DataType dataType = DataType.FLOAT; + /** + * Whether to override the nIn configuration forcibly upon construction. Default value is true. + * @return builder pattern + */ + @Getter @Setter + @lombok.Builder.Default + private boolean overrideNinUponBuild = true; + /** + * Enabled by default. If enabled, the output layer configuration will be validated, to throw an + * exception on likely invalid outputs - such as softmax + nOut=1, or LossMCXENT + Tanh.
If + * disabled (false) no output layer validation will be performed.
Disabling this validation + * is not recommended, as the configurations that fail validation usually will not be able to + * learn correctly. However, the option to disable this validation is provided for advanced + * users when creating non-standard architectures. + * + * @param validate If true: validate output layer configuration. False: don't validate + */ + @Getter @Setter @lombok.Builder.Default + private boolean validateOutputLayerConfig=true; + /** + * Enabled by default. If enabled, an exception will be throw when using the (invalid) + * combination of truncated backpropagation through time (TBPTT) with either a + * GlobalPoolingLayer or LastTimeStepLayer.
It is possible to disable this validation to + * allow what is almost certainly an invalid configuration to be used, however this is not + * recommended. + * + * @param validate Whether TBPTT validation should be performed + */ + @Getter @Setter @lombok.Builder.Default + private boolean validateTbpttConfig=true; + + + + /** + * Gradient updater configuration. For example, {@link org.nd4j.linalg.learning.config.Adam} + * or {@link org.nd4j.linalg.learning.config.Nesterovs}
+ * Note: values set by this method will be applied to all applicable layers in the network, unless a different + * value is explicitly set on a given layer. In other words: values set via this method are used as the default + * value, and can be overridden on a per-layer basis. + * + * @param updater Updater to use + */ + @Getter @Setter @NonNull + private IUpdater updater; + + /** + * Gradient normalization strategy. Used to specify gradient renormalization, gradient clipping etc. + * See {@link GradientNormalization} for details
+ * Note: values set by this method will be applied to all applicable layers in the network, unless a different + * value is explicitly set on a given layer. In other words: values set via this method are used as the default + * value, and can be overridden on a per-layer basis. + * + * @param gradientNormalization Type of normalization to use. Defaults to None. + * @see GradientNormalization + */ + @Getter @Setter @NonNull @lombok.Builder.Default + private GradientNormalization gradientNormalization = GradientNormalization.None; + + /** + * Threshold for gradient normalization, only used for GradientNormalization.ClipL2PerLayer, + * GradientNormalization.ClipL2PerParamType, and GradientNormalization.ClipElementWiseAbsoluteValue
+ * Not used otherwise.
+ * L2 threshold for first two types of clipping, or absolute value threshold for last type of clipping.
+ * Note: values set by this method will be applied to all applicable layers in the network, unless a different + * value is explicitly set on a given layer. In other words: values set via this method are used as the default + * value, and can be overridden on a per-layer basis. + */ + @Getter @Setter + private double gradientNormalizationThreshold; + + + /** + * Weight initialization scheme to use, for initial weight values + * Note: values set by this method will be applied to all applicable layers in the network, unless a different + * value is explicitly set on a given layer. In other words: values set via this method are used as the default + * value, and can be overridden on a per-layer basis. + */ + @Getter @Setter + private IWeightInit weightInit; + + /** + * Activation function / neuron non-linearity
+ * Note: values set by this method will be applied to all applicable layers in the network, unless a different + * value is explicitly set on a given layer. In other words: values set via this method are used as the default + * value, and can be overridden on a per-layer basis. + */ + @Getter @Setter + private IActivation activation; + + + + /** + * Create a neural net configuration from json + * + * @param json the neural net configuration from json + * @return {@link NeuralNetworkConfiguration} + */ + public static NeuralNetworkConfiguration fromJson(String json) { + NeuralNetworkConfiguration conf; + ObjectMapper mapper = NeuralNetworkConfiguration.mapper(); + try { + conf = mapper.readValue(json, NeuralNetworkConfiguration.class); + } catch (InvalidTypeIdException e) { + if (e.getMessage().contains("@class")) { + try { + //JSON may be legacy (1.0.0-alpha or earlier), attempt to load it using old format + return JsonMappers.getLegacyMapper().readValue(json, NeuralNetworkConfiguration.class); + } catch (InvalidTypeIdException e2) { + //Check for legacy custom layers: "Could not resolve type id 'CustomLayer' as a subtype of [simple type, class org.deeplearning4j.nn.conf.layers.ILayer]: known type ids = [Bidirectional, CenterLossOutputLayer, CnnLossLayer, ..." + //1.0.0-beta5: dropping support for custom layers defined in pre-1.0.0-beta format. Built-in layers from these formats still work + String msg = e2.getMessage(); + if (msg != null && msg.contains("Could not resolve type id")) { + throw new RuntimeException( + "Error deserializing MultiLayerConfiguration - configuration may have a custom " + + "layer, vertex or preprocessor, in pre version 1.0.0-beta JSON format.\nModels in legacy format with custom" + + + " layers should be loaded in 1.0.0-beta to 1.0.0-beta4 and saved again, before loading in the current version of DL4J", + e); + } + throw new RuntimeException(e2); + } catch (IOException e2) { + throw new RuntimeException(e2); + } + } + throw new RuntimeException(e); + } catch (IOException e) { + //Check if this exception came from legacy deserializer... + String msg = e.getMessage(); + if (msg != null && msg.contains("legacy")) { + throw new RuntimeException( + "Error deserializing MultiLayerConfiguration - configuration may have a custom " + + "layer, vertex or preprocessor, in pre version 1.0.0-alpha JSON format. These layers can be " + + + "deserialized by first registering them with NeuralNetConfiguration.registerLegacyCustomClassesForJSON(Class...)", + e); + } + throw new RuntimeException(e); + } + + //To maintain backward compatibility after loss function refactoring (configs generated with v0.5.0 or earlier) + // Previously: enumeration used for loss functions. Now: use classes + // IN the past, could have only been an OutputLayer or RnnOutputLayer using these enums + int layerCount = 0; + JsonNode confs = null; + for (NeuralNetworkConfiguration nnc : conf.getConfs()) { + Layer l = nnc.getLayer(); + if (l instanceof BaseOutputLayer && ((BaseOutputLayer) l).getLossFn() == null) { + //lossFn field null -> may be an old config format, with lossFunction field being for the enum + //if so, try walking the JSON graph to extract out the appropriate enum value + + BaseOutputLayer ol = (BaseOutputLayer) l; + try { + JsonNode jsonNode = mapper.readTree(json); + if (confs == null) { + confs = jsonNode.get("confs"); + } + if (confs instanceof ArrayNode) { + ArrayNode layerConfs = (ArrayNode) confs; + JsonNode outputLayerNNCNode = layerConfs.get(layerCount); + if (outputLayerNNCNode == null) { + throw new RuntimeException("should never happen"); //return conf; //Should never happen... + } + JsonNode outputLayerNode = outputLayerNNCNode.get("layer"); + + JsonNode lossFunctionNode = null; + if (outputLayerNode.has("output")) { + lossFunctionNode = outputLayerNode.get("output").get("lossFunction"); + } else if (outputLayerNode.has("rnnoutput")) { + lossFunctionNode = outputLayerNode.get("rnnoutput").get("lossFunction"); + } + + if (lossFunctionNode != null) { + String lossFunctionEnumStr = lossFunctionNode.asText(); + LossFunctions.LossFunction lossFunction = null; + try { + lossFunction = LossFunctions.LossFunction.valueOf(lossFunctionEnumStr); + } catch (Exception e) { + log.warn( + "OutputLayer with null LossFunction or pre-0.6.0 loss function configuration detected: could not parse JSON", + e); + } + + if (lossFunction != null) { + switch (lossFunction) { + case MSE: + ol.setLossFn(new LossMSE()); + break; + case XENT: + ol.setLossFn(new LossBinaryXENT()); + break; + case NEGATIVELOGLIKELIHOOD: + ol.setLossFn(new LossNegativeLogLikelihood()); + break; + case MCXENT: + ol.setLossFn(new LossMCXENT()); + break; + + //Remaining: TODO + case SQUARED_LOSS: + case RECONSTRUCTION_CROSSENTROPY: + default: + log.warn( + "OutputLayer with null LossFunction or pre-0.6.0 loss function configuration detected: could not set loss function for {}", + lossFunction); + break; + } + } + } + + } else { + log.warn( + "OutputLayer with null LossFunction or pre-0.6.0 loss function configuration detected: could not parse JSON: layer 'confs' field is not an ArrayNode (is: {})", + (confs != null ? confs.getClass() : null)); + } + } catch (IOException e) { + log.warn( + "OutputLayer with null LossFunction or pre-0.6.0 loss function configuration detected: could not parse JSON", + e); + break; + } + } + + //Also, pre 0.7.2: activation functions were Strings ("activationFunction" field), not classes ("activationFn") + //Try to load the old format if necessary, and create the appropriate IActivation instance + if ((l instanceof BaseLayer) && ((BaseLayer) l).getActivationFn() == null) { + try { + JsonNode jsonNode = mapper.readTree(json); + if (confs == null) { + confs = jsonNode.get("confs"); + } + if (confs instanceof ArrayNode) { + ArrayNode layerConfs = (ArrayNode) confs; + JsonNode outputLayerNNCNode = layerConfs.get(layerCount); + if (outputLayerNNCNode == null) { + throw new RuntimeException("Should never happen"); //return conf; //Should never happen... + } + JsonNode layerWrapperNode = outputLayerNNCNode.get("layer"); + + if (layerWrapperNode == null || layerWrapperNode.size() != 1) { + continue; + } + + JsonNode layerNode = layerWrapperNode.elements().next(); + JsonNode activationFunction = layerNode.get( + "activationFunction"); //Should only have 1 element: "dense", "output", etc + + if (activationFunction != null) { + IActivation ia = Activation.fromString(activationFunction.asText()) + .getActivationFunction(); + ((BaseLayer) l).setActivationFn(ia); + } + } + + } catch (IOException e) { + log.warn( + "ILayer with null ActivationFn field or pre-0.7.2 activation function detected: could not parse JSON", + e); + } + } + + if (!handleLegacyWeightInitFromJson(json, l, mapper, confs, layerCount)) { + return conf; + } + + layerCount++; + } + return conf; } + + /** + * Handle {@link WeightInit} and {@link Distribution} from legacy configs in Json format. Copied + * from handling of {@link Activation} above. + * + * @return True if all is well and layer iteration shall continue. False else-wise. + */ + private static boolean handleLegacyWeightInitFromJson(String json, Layer l, ObjectMapper mapper, + JsonNode confs, int layerCount) { + if ((l instanceof BaseLayer) && ((BaseLayer) l).getWeightInitFn() == null) { + try { + JsonNode jsonNode = mapper.readTree(json); + if (confs == null) { + confs = jsonNode.get("confs"); + } + if (confs instanceof ArrayNode) { + ArrayNode layerConfs = (ArrayNode) confs; + JsonNode outputLayerNNCNode = layerConfs.get(layerCount); + if (outputLayerNNCNode == null) { + return false; //Should never happen... + } + JsonNode layerWrapperNode = outputLayerNNCNode.get("layer"); + + if (layerWrapperNode == null || layerWrapperNode.size() != 1) { + return true; + } + + JsonNode layerNode = layerWrapperNode.elements().next(); + JsonNode weightInit = layerNode.get( + "weightInit"); //Should only have 1 element: "dense", "output", etc + JsonNode distribution = layerNode.get("dist"); + + Distribution dist = null; + if (distribution != null) { + dist = mapper.treeToValue(distribution, Distribution.class); + } + + if (weightInit != null) { + final IWeightInit wi = WeightInit.valueOf(weightInit.asText()) + .getWeightInitFunction(dist); + ((BaseLayer) l).setWeightInitFn(wi); + } + } + + } catch (IOException e) { + log.warn( + "ILayer with null WeightInit detected: " + l.getLayerName() + ", could not parse JSON", + e); + } + } + return true; + + } + + /** + * Object mapper for serialization of configurations + * + * @return + */ + public static ObjectMapper mapperYaml() { + return JsonMappers.getMapperYaml(); + } + + /** + * Object mapper for serialization of configurations + * + * @return + */ + public static ObjectMapper mapper() { + return JsonMappers.getMapper(); + } + + + + /** + * @return JSON representation of NN configuration + */ + public String toYaml() { + ObjectMapper mapper = NeuralNetConfiguration.mapperYaml(); + synchronized (mapper) { + try { + return mapper.writeValueAsString(this); + } catch (com.fasterxml.jackson.core.JsonProcessingException e) { + throw new RuntimeException(e); + } + } + } + + /** + * @return JSON representation of NN configuration + */ + public String toJson() { + ObjectMapper mapper = NeuralNetConfiguration.mapper(); + synchronized (mapper) { + //JSON mappers are supposed to be thread safe: however, in practice they seem to miss fields occasionally + //when writeValueAsString is used by multiple threads. This results in invalid JSON. See issue #3243 + try { + return mapper.writeValueAsString(this); + } catch (com.fasterxml.jackson.core.JsonProcessingException e) { + throw new RuntimeException(e); + } + } + } + + @Override + public String toString() { + return toJson(); + } + + public NeuralNetworkConfiguration getConf(int i) { + return confs.get(i); + } + + @Override + public NeuralNetworkConfiguration clone() { + + NeuralNetworkConfiguration clone = (NeuralNetworkConfiguration) super.clone(); + List confList = clone.getConfs(); + if (confList != null) { + List list = new ArrayList<>(); + for (NeuralNetworkConfiguration conf : confList) { + list.add(conf.clone()); + } + } + + if (clone.getInputPreProcessors() != null) { + Map map = new HashMap<>(); + for (Map.Entry entry : clone.getInputPreProcessors().entrySet()) { + map.put(entry.getKey(), entry.getValue().clone()); + } + clone.getInputPreProcessors().clear(); + clone.getInputPreProcessors().putAll(map); + } + + clone.setInferenceWorkspaceMode(this.inferenceWorkspaceMode); + clone.setTrainingWorkspaceMode(this.trainingWorkspaceMode); + clone.setCacheMode(this.cacheMode); + clone.setValidateOutputLayerConfig(this.validateOutputLayerConfig); + clone.setDataType(this.dataType); + + return clone; + + } + + public InputPreProcessor getInputPreProcess(int curr) { + return inputPreProcessors.get(curr); + } + + /** + * Get a {@link MemoryReport} for the given MultiLayerConfiguration. This is used to estimate the + * memory requirements for the given network configuration and input + * + * @param inputType Input types for the network + * @return Memory report for the network + */ + public NetworkMemoryReport getMemoryReport(InputType inputType) { + + Map memoryReportMap = new LinkedHashMap<>(); + int nLayers = confs.size(); + for (int i = 0; i < nLayers; i++) { + String layerName = confs.get(i).getLayer().getLayerName(); + if (layerName == null) { + layerName = String.valueOf(i); + } + + //Pass input type through preprocessor, if necessary + InputPreProcessor preproc = getInputPreProcess(i); + //TODO memory requirements for preprocessor + if (preproc != null) { + inputType = preproc.getOutputType(inputType); + } + + LayerMemoryReport report = confs.get(i).getLayer().getMemoryReport(inputType); + memoryReportMap.put(layerName, report); + + inputType = confs.get(i).getLayer().getOutputType(i, inputType); + } + + return new NetworkMemoryReport(memoryReportMap, MultiLayerConfiguration.class, + "MultiLayerNetwork", inputType); + } + + /** + * For the given input shape/type for the network, return a list of activation sizes for each + * layer in the network.
i.e., list.get(i) is the output activation sizes for layer i + * + * @param inputType Input type for the network + * @return A lits of activation types for the network, indexed by layer number + */ + public List getLayerActivationTypes(@NonNull InputType inputType) { + List out = new ArrayList<>(); + int nLayers = confs.size(); + for (int i = 0; i < nLayers; i++) { + InputPreProcessor preproc = getInputPreProcess(i); + if (preproc != null) { + inputType = preproc.getOutputType(inputType); + } + + inputType = confs.get(i).getLayer().getOutputType(i, inputType); + out.add(inputType); + } + return out; + } + + /** + * Defines some additional handy methods. Other than that, + * the builder is generated by lombok. + */ + public static class NeuralNetworkConfigurationBuilder { + + /** + * Specify the processors. These are used at each layer for doing things like normalization and + * shaping of input. + * + * @param processor what to use to preProcess the data. + * @return builder pattern + */ + public NeuralNetworkConfigurationBuilder inputPreProcessor(Integer layer, + InputPreProcessor processor) { + inputPreProcessors.put(layer, processor); + return this; + } + + /** + * Specify additional layer configurations + */ + @Deprecated + public NeuralNetworkConfigurationBuilder layersFromArray(Layer[] arrLayers) { + for(Layer l : arrLayers) { + layers.add( l ); + } + return this; + } + } + + } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/conf/layer/AbstractLayerConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/conf/layer/AbstractLayerConfiguration.java index 951688e51..1ed923bda 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/conf/layer/AbstractLayerConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/conf/layer/AbstractLayerConfiguration.java @@ -24,12 +24,12 @@ package net.brutex.ai.dnn.conf.layer; import lombok.Getter; import lombok.NonNull; import lombok.Setter; -import net.brutex.ai.dnn.api.LayerConfiguration; -import org.deeplearning4j.nn.conf.inputs.InputType; +import lombok.experimental.SuperBuilder; +import net.brutex.ai.dnn.api.ILayerConfiguration; -public abstract class AbstractLayerConfiguration implements LayerConfiguration { +@SuperBuilder +public abstract class AbstractLayerConfiguration implements ILayerConfiguration { @Getter @Setter @NonNull - private InputType.Type inputType; - + private String name; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/conf/layer/DenseLayerConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/conf/layer/DenseLayerConfiguration.java new file mode 100644 index 000000000..d472d99b2 --- /dev/null +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/conf/layer/DenseLayerConfiguration.java @@ -0,0 +1,62 @@ +/* + * + * ****************************************************************************** + * * + * * This program and the accompanying materials are made available under the + * * terms of the Apache License, Version 2.0 which is available at + * * https://www.apache.org/licenses/LICENSE-2.0. + * * + * * See the NOTICE file distributed with this work for additional + * * information regarding copyright ownership. + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * * License for the specific language governing permissions and limitations + * * under the License. + * * + * * SPDX-License-Identifier: Apache-2.0 + * ***************************************************************************** + * + */ + +package net.brutex.ai.dnn.conf.layer; + +import lombok.Builder; +import lombok.experimental.SuperBuilder; +import org.deeplearning4j.nn.conf.layers.LayerValidation; + +/** + * The dense layer is a neural network layer that is connected deeply, which means each neuron in + * the dense layer receives input from all neurons of its previous layer. The dense layer is found + * to be the most commonly used layer in the models. + *

+ * In the background, the dense layer performs a matrix-vector multiplication. The values used in + * the matrix are actually parameters that can be trained and updated with the help of + * backpropagation. + *

+ * The output generated by the dense layer is an ‘m’ dimensional vector. Thus, dense layer is + * basically used for changing the dimensions of the vector. Dense layers also applies operations + * like rotation, scaling, translation on the vector. + */ +@SuperBuilder +public class DenseLayerConfiguration extends FeedForwardLayerConfiguration { + + /** + * Decides whether we should include a bias vector for calculation purposes or not. + */ + @Builder.Default + boolean bias = true; + + + + /** + * An implementation to validate the network + * + * @return true if no errors found; false otherwise + */ + @Override + public boolean isValid() { + LayerValidation.assertNInNOutSet("DenseLayerConfiguration", getName(), -99, getIn(), getOut()); + return super.isValid(); + } +} diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/conf/layer/FeedForwardLayerConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/conf/layer/FeedForwardLayerConfiguration.java new file mode 100644 index 000000000..c86869d54 --- /dev/null +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/conf/layer/FeedForwardLayerConfiguration.java @@ -0,0 +1,99 @@ +/* + * + * ****************************************************************************** + * * + * * This program and the accompanying materials are made available under the + * * terms of the Apache License, Version 2.0 which is available at + * * https://www.apache.org/licenses/LICENSE-2.0. + * * + * * See the NOTICE file distributed with this work for additional + * * information regarding copyright ownership. + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * * License for the specific language governing permissions and limitations + * * under the License. + * * + * * SPDX-License-Identifier: Apache-2.0 + * ***************************************************************************** + * + */ + +package net.brutex.ai.dnn.conf.layer; + +import lombok.Getter; +import lombok.experimental.SuperBuilder; +import lombok.extern.slf4j.Slf4j; +import net.brutex.ai.dnn.api.ILayer; +import net.brutex.ai.dnn.api.ILayerConfiguration; +import net.brutex.ai.dnn.api.IModel; +import org.deeplearning4j.nn.conf.inputs.InputType; +import org.deeplearning4j.nn.conf.inputs.InputType.Type; + +/** + * A Feed Forward Layer Configuration + */ +@Slf4j +@SuperBuilder +public class FeedForwardLayerConfiguration extends AbstractLayerConfiguration implements ILayerConfiguration { + + @Getter private int in; + @Getter private int out; + + /** + * This Fast Forward ILayer will always output data as + * FF type. + * @return InputType for FF + **/ + @Getter + final InputType.Type outputType = InputType.Type.FF; + + @Getter + final InputType.Type inputType = InputType.Type.FF; + + /** + * Create and return an instance of a ILayerConfiguration. + * + * @param network the "holding" network for the instance + * @return the new layer instance + */ + //@Override + public ILayer instantiate(IModel network) { + //Let's do some verifications first + if(getInputType() != Type.FF) { + log.error("The {} layer configuration must use an InputType of {}, but found {}", + this.getClass().getSimpleName(), + Type.FF.name(), + getInputType().name()); + } + return null; + } + + /** + * Number of trainable parameter in this layer + * + * @return number of parameter + */ + @Override + public long numParameters() { + return in * out + out; //add one extra out for the bias + } + + /** + * An implementation should provide a method to validate the network + * + * @return true if no errors found; false otherwise + */ + @Override + public boolean isValid() { + boolean result = true; + if(getInputType() != Type.FF) { + log.error("The {} layer configuration must use an InputType of {}, but found {}", + this.getClass().getSimpleName(), + Type.FF.name(), + getInputType().name()); + result = false; + } + return result; + } +} diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/impl/network/AbstractNeuralNetwork.java b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/impl/network/AbstractNeuralNetwork.java deleted file mode 100644 index a1c36e988..000000000 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/impl/network/AbstractNeuralNetwork.java +++ /dev/null @@ -1,72 +0,0 @@ -/* - * - * ****************************************************************************** - * * - * * This program and the accompanying materials are made available under the - * * terms of the Apache License, Version 2.0 which is available at - * * https://www.apache.org/licenses/LICENSE-2.0. - * * - * * See the NOTICE file distributed with this work for additional - * * information regarding copyright ownership. - * * Unless required by applicable law or agreed to in writing, software - * * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * * License for the specific language governing permissions and limitations - * * under the License. - * * - * * SPDX-License-Identifier: Apache-2.0 - * ***************************************************************************** - * - */ - -package net.brutex.ai.dnn.impl.network; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashSet; -import java.util.List; -import lombok.Getter; -import lombok.NonNull; -import lombok.Setter; -import net.brutex.ai.dnn.api.Layer; -import net.brutex.ai.dnn.api.NeuralNetwork; -import net.brutex.ai.dnn.api.LayerConfiguration; -import net.brutex.ai.dnn.conf.NeuralNetworkConfiguration; -import org.deeplearning4j.optimize.api.TrainingListener; -import org.nd4j.linalg.dataset.api.MultiDataSet; - -public abstract class AbstractNeuralNetwork implements NeuralNetwork { - - @Getter @Setter @NonNull - private String name; - - @Getter @NonNull - private NeuralNetworkConfiguration configuration; - - @Getter - private final Collection trainingListeners = new HashSet<>(); - - /** - * The neural network holds an instantiation of its configured - * layers. - * @return the actual runtime layers - */ - @Getter - private final List runtimeLayers = new ArrayList<>(); - - /** - * Sets the configuration to be used. Each time a configuration is set, the runtime layers - * of this NeuralNetwork are updated from the configuration. - * - * @param conf the configuration to use for this network - */ - public void setConfiguration(net.brutex.ai.dnn.api.NeuralNetworkConfiguration conf) { - List layers = conf.getLayerConfigurations(); - for(LayerConfiguration layer : layers) { - Layer initializedLayer = layer.instantiate(this); - this.getRuntimeLayers().add(initializedLayer); - } - this.configuration = configuration; - } - -} diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/impl/network/NeuralNetwork.java b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/impl/network/NeuralNetwork.java deleted file mode 100644 index 198007baf..000000000 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/impl/network/NeuralNetwork.java +++ /dev/null @@ -1,692 +0,0 @@ -/* - * - * ****************************************************************************** - * * - * * This program and the accompanying materials are made available under the - * * terms of the Apache License, Version 2.0 which is available at - * * https://www.apache.org/licenses/LICENSE-2.0. - * * - * * See the NOTICE file distributed with this work for additional - * * information regarding copyright ownership. - * * Unless required by applicable law or agreed to in writing, software - * * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * * License for the specific language governing permissions and limitations - * * under the License. - * * - * * SPDX-License-Identifier: Apache-2.0 - * ***************************************************************************** - * - */ - -package net.brutex.ai.dnn.impl.network; - -import java.util.Arrays; -import java.util.Collection; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map; -import lombok.Getter; -import lombok.NonNull; -import lombok.Setter; -import lombok.extern.slf4j.Slf4j; -import lombok.val; -import org.bytedeco.javacpp.Pointer; -import org.deeplearning4j.datasets.iterator.MultiDataSetWrapperIterator; -import org.deeplearning4j.exception.DL4JInvalidInputException; -import org.deeplearning4j.nn.api.Classifier; -import org.deeplearning4j.nn.api.Layer; -import org.deeplearning4j.nn.api.MaskState; -import org.deeplearning4j.nn.api.Updater; -import org.deeplearning4j.nn.api.layers.IOutputLayer; -import org.deeplearning4j.nn.api.layers.RecurrentLayer; -import org.deeplearning4j.nn.conf.BackpropType; -import org.deeplearning4j.nn.conf.InputPreProcessor; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; -import net.brutex.ai.dnn.conf.NeuralNetworkConfiguration; -import org.deeplearning4j.nn.conf.WorkspaceMode; -import org.deeplearning4j.nn.layers.FrozenLayerWithBackprop; -import org.deeplearning4j.nn.layers.wrapper.BaseWrapperLayer; -import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; -import org.deeplearning4j.nn.updater.UpdaterCreator; -import org.deeplearning4j.nn.workspace.ArrayType; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -import org.deeplearning4j.optimize.Solver; -import org.deeplearning4j.optimize.api.ConvexOptimizer; -import org.deeplearning4j.optimize.api.TrainingListener; -import org.deeplearning4j.util.CrashReportingUtil; -import org.deeplearning4j.util.ModelSerializer; -import org.nd4j.common.base.Preconditions; -import org.nd4j.common.primitives.Pair; -import org.nd4j.linalg.api.memory.MemoryWorkspace; -import org.nd4j.linalg.api.memory.conf.WorkspaceConfiguration; -import org.nd4j.linalg.api.memory.enums.AllocationPolicy; -import org.nd4j.linalg.api.memory.enums.LearningPolicy; -import org.nd4j.linalg.api.memory.enums.ResetPolicy; -import org.nd4j.linalg.api.memory.enums.SpillPolicy; -import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.dataset.AsyncDataSetIterator; -import org.nd4j.linalg.dataset.DataSet; -import org.nd4j.linalg.dataset.api.MultiDataSet; -import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; -import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator; -import org.nd4j.linalg.exception.ND4JArraySizeException; -import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.linalg.heartbeat.Heartbeat; -import org.nd4j.linalg.heartbeat.reports.Environment; -import org.nd4j.linalg.heartbeat.reports.Event; -import org.nd4j.linalg.heartbeat.reports.Task; -import org.nd4j.linalg.heartbeat.utils.EnvironmentUtils; -import org.nd4j.linalg.heartbeat.utils.TaskUtils; -import org.nd4j.linalg.indexing.NDArrayIndex; - -@Slf4j -public class NeuralNetwork extends AbstractNeuralNetwork { - - - //the hidden neural network layers (including output layer) - protected Layer[] layers; - - protected transient ThreadLocal lastEtlTime = new ThreadLocal<>(); - - //Current training data: input features and labels - @Getter @Setter @NonNull - protected INDArray input; - @Getter @Setter - protected INDArray labels; - - //Workspaces for CUDNN. Pass to LayerWorkspaceMgr for re-use in cudnn helpers - @Getter - protected transient Map helperWorkspaces = new HashMap<>(); - - /** - * Used to call optimizers during backprop - */ - @NonNull - protected transient Solver solver = new Solver.Builder().configure(getConfiguration()). - listeners(getTrainingListeners()).model(this).build(); - - - /** - * Create a new NeuralNetwork from the given configuration - * @param conf - */ - public NeuralNetwork(NeuralNetworkConfiguration conf) { - if(! validateConfiguration() ) { - log.error("Configuration '{}' has failed validation.", conf.getName()); - throw new RuntimeException(); - } - log.info("Configuration '{}' has been validated successfully.", conf.getName()); - this.conf = conf; - } - - private boolean validateConfiguration() { - - return true; - } - - private void logNotImplemented( ) { - // getStackTrace() method return - // current method name at 0th index - String method = new Throwable() - .getStackTrace()[1] - .getMethodName(); - log.trace("Method '{}}' is not implemented for {}", method, this.getClass().getSimpleName()); - } - - /** - * This method does initialization of model - *

- * PLEASE NOTE: All implementations should track own state, to avoid double spending - */ - @Override - public void init() { - logNotImplemented(); - } - - /** - * This method returns model parameters as single INDArray - * - * @return - */ - @Override - public INDArray params() { - logNotImplemented(); - return null; - } - - /** - * This method returns updater state (if applicable), null otherwise - * - * @return - */ - @Override - public INDArray updaterState() { - return getUpdater(true) != null ? getUpdater(true).getStateViewArray() : null; - } - - /** - * This method returns Optimizer used for training - * - * @return the optimizer - */ - @Override - public ConvexOptimizer getOptimizer() { - return solver.getOptimizer(); - } - - - - /** Get the updater for this NeuralNetwork from the Solver - * @return Updater for NeuralNetwork - */ - private Updater getUpdater(boolean initializeIfReq) { - if (solver == null && initializeIfReq) { - synchronized(this){ - if(solver == null) { //May have been created while waiting for lock - solver = new Solver.Builder().configure(conf()).listeners(getTrainingListeners()).model(this).build(); - solver.getOptimizer().setUpdater(UpdaterCreator.getUpdater(this)); - } - } - } - if(solver != null) { - return solver.getOptimizer().getUpdater(initializeIfReq); - } - return null; - } - - /** - * Set the updater for the NeuralNetwork in the Solver - * */ - public void setUpdater(@NonNull Updater updater) { - solver.getOptimizer().setUpdater(updater); - } - - - @Override - public void fit(MultiDataSet dataSet) { - if (dataSet.getFeatures().length == 1 && dataSet.getLabels().length == 1) { - INDArray features = dataSet.getFeatures(0); - INDArray labels = dataSet.getLabels(0); - INDArray fMask = null; - INDArray lMask = null; - - if (dataSet.getFeaturesMaskArrays() != null) - fMask = dataSet.getFeaturesMaskArrays()[0]; - - if (dataSet.getFeaturesMaskArrays() != null) - lMask = dataSet.getLabelsMaskArrays()[0]; - - DataSet ds = new DataSet(features, labels, fMask, lMask); - fit(ds); - } else { - throw new DL4JInvalidInputException( - "MultiLayerNetwork can't handle MultiDataSet with more than 1 features or labels array." + - "Please consider use of ComputationGraph"); - } - } - - /** - * Perform minibatch training on all minibatches in the MultiDataSetIterator, for the specified number of epochs. - * Equvalent to calling {@link #fit(MultiDataSetIterator)} numEpochs times in a loop - * - * @param iterator Training data (DataSetIterator). Iterator must support resetting - * @param numEpochs Number of training epochs, >= 1 - */ - public void fit(@NonNull MultiDataSetIterator iterator, int numEpochs){ - Preconditions.checkArgument(numEpochs > 0, "Number of epochs much be > 0. Got numEpochs = %s", numEpochs); - Preconditions.checkArgument(numEpochs == 1 || iterator.resetSupported(), "Cannot perform multiple epochs training using" + - "iterator has does not support resetting (iterator.resetSupported() returned false)"); - - for(int i = 0; i < numEpochs; i++) { - fit(iterator); - } - } - - /** - * Perform minibatch training on all minibatches in the MultiDataSetIterator.
- * Note: The MultiDataSets in the MultiDataSetIterator must have exactly 1 input and output array (as - * MultiLayerNetwork only supports 1 input and 1 output) - * - * @param iterator Training data (DataSetIterator). Iterator must support resetting - */ - @Override - public void fit(MultiDataSetIterator iterator) { - fit(new MultiDataSetWrapperIterator(iterator)); - } - - /** - * Perform minibatch training on all minibatches in the DataSetIterator for 1 epoch.
- * Note that this method does not do layerwise pretraining.
- * For pretraining use method pretrain.. #pretrain(DataSetIterator)
- * @param iterator Training data (DataSetIterator) - */ - @Override - public void fit(DataSetIterator iterator) { - try{ - fitHelper(iterator); - } catch (OutOfMemoryError e){ - CrashReportingUtil.writeMemoryCrashDump(this, e); - throw e; - } - } - - private synchronized void fitHelper(DataSetIterator iterator){ - // we're wrapping all iterators into AsyncDataSetIterator to provide background prefetch - where appropriate - DataSetIterator iter; - boolean destructable = false; - if (iterator.asyncSupported()) { - iter = new AsyncDataSetIterator(iterator, Math.min( - Nd4j.getAffinityManager().getNumberOfDevices() * 2, 2), true); - destructable = true; - } else { - iter = iterator; - } - - for (TrainingListener tl : trainingListeners) { - tl.onEpochStart(this); - } - - LayerWorkspaceMgr workspaceMgr; - if(conf.getTrainingWorkspaceMode() == WorkspaceMode.NONE){ - workspaceMgr = LayerWorkspaceMgr.noWorkspaces(); - } else { - workspaceMgr = LayerWorkspaceMgr.builder() - .with(ArrayType.ACTIVATIONS, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) - .with(ArrayType.INPUT, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) - .with(ArrayType.FF_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) - .with(ArrayType.BP_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) - .with(ArrayType.RNN_FF_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM_CONFIG) - .with(ArrayType.RNN_BP_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM_CONFIG) - //Note for updater working memory, we have the option to re-use WS_ALL_LAYERS_ACT or FF/BP_WORKING_MEM - // as these should be closed by the time updaters are executed - //Generally, WS_ALL_LAYERS_ACT will be the larger of the two, so we'll use this - .with(ArrayType.UPDATER_WORKING_MEM, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) - .build(); - } - workspaceMgr.setHelperWorkspacePointers(helperWorkspaces); - - update(TaskUtils.buildTask(iter)); - if (!iter.hasNext() && iter.resetSupported()) { - iter.reset(); - } - long time1 = System.currentTimeMillis(); - while (iter.hasNext()) { - - DataSet next = iter.next(); - long time2 = System.currentTimeMillis(); - - lastEtlTime.set((time2 - time1)); - - if (next.getFeatures() == null || next.getLabels() == null) - break; - - // TODO: basically we want to wrap internals of this loop into workspace - - - boolean hasMaskArrays = next.hasMaskArrays(); - - if (conf.getBackpropType() == BackpropType.TruncatedBPTT) { - doTruncatedBPTT(next.getFeatures(), next.getLabels(), next.getFeaturesMaskArray(), - next.getLabelsMaskArray(), workspaceMgr); - } else { - if (hasMaskArrays) - setLayerMaskArrays(next.getFeaturesMaskArray(), next.getLabelsMaskArray()); - - setInput(next.getFeatures()); - setLabels(next.getLabels()); - - if (solver == null) { - try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { - solver = new Solver.Builder().configure(conf()).listeners(getTrainingListeners()).model(this) - .build(); - } - } - - //TODO CACHE - solver.optimize(workspaceMgr); - } - - if (hasMaskArrays) - clearLayerMaskArrays(); - - time1 = System.currentTimeMillis(); - synchronizeIterEpochCounts(); - } - - if (!trainingListeners.isEmpty()) { - for (TrainingListener tl : trainingListeners) { - tl.onEpochEnd(this); - } - } - - clearLayersStates(); - - if (destructable) - ((AsyncDataSetIterator) iter).shutdown(); - - incrementEpochCount(); - } - - - /** - * Workspace for working memory for a single layer: forward pass and backward pass - * Note that this is opened/closed once per op (activate/backpropGradient call) - */ - protected static final String WS_LAYER_WORKING_MEM = "WS_LAYER_WORKING_MEM"; - /** - * Workspace for storing all layers' activations - used only to store activations (layer inputs) as part of backprop - * Not used for inference - */ - protected static final String WS_ALL_LAYERS_ACT = "WS_ALL_LAYERS_ACT"; - /** - * Next 2 workspaces: used for: - * (a) Inference: holds activations for one layer only - * (b) Backprop: holds activation gradients for one layer only - * In both cases, they are opened and closed on every second layer - */ - protected static final String WS_LAYER_ACT_1 = "WS_LAYER_ACT_1"; - protected static final String WS_LAYER_ACT_2 = "WS_LAYER_ACT_2"; - - /** - * Workspace for output methods that use OutputAdapter - */ - protected static final String WS_OUTPUT_MEM = "WS_OUTPUT_MEM"; - - /** - * Workspace for working memory in RNNs - opened and closed once per RNN time step - */ - protected static final String WS_RNN_LOOP_WORKING_MEM = "WS_RNN_LOOP_WORKING_MEM"; - - - protected WorkspaceConfiguration WS_LAYER_WORKING_MEM_CONFIG; - - protected static final WorkspaceConfiguration WS_ALL_LAYERS_ACT_CONFIG = WorkspaceConfiguration.builder() - .initialSize(0) - .overallocationLimit(0.05) - .policyLearning(LearningPolicy.FIRST_LOOP) - .policyReset(ResetPolicy.BLOCK_LEFT) - .policySpill(SpillPolicy.REALLOCATE) - .policyAllocation(AllocationPolicy.OVERALLOCATE) - .build(); - - protected WorkspaceConfiguration WS_LAYER_ACT_X_CONFIG; - - protected static final WorkspaceConfiguration WS_RNN_LOOP_WORKING_MEM_CONFIG = WorkspaceConfiguration.builder() - .initialSize(0).overallocationLimit(0.05).policyReset(ResetPolicy.BLOCK_LEFT) - .policyAllocation(AllocationPolicy.OVERALLOCATE).policySpill(SpillPolicy.REALLOCATE) - .policyLearning(LearningPolicy.FIRST_LOOP).build(); - - - boolean initDone; - protected void update(Task task) { - if (!initDone) { - initDone = true; - Heartbeat heartbeat = Heartbeat.getInstance(); - task = ModelSerializer.taskByModel(this); - Environment env = EnvironmentUtils.buildEnvironment(); - heartbeat.reportEvent(Event.STANDALONE, env, task); - } - } - - protected void doTruncatedBPTT(INDArray input, INDArray labels, INDArray featuresMaskArray, - INDArray labelsMaskArray, LayerWorkspaceMgr workspaceMgr) { - if (input.rank() != 3 || labels.rank() != 3) { - log.warn("Cannot do truncated BPTT with non-3d inputs or labels. Expect input with shape [miniBatchSize,nIn,timeSeriesLength], got " - + Arrays.toString(input.shape()) + "\tand labels with shape " - + Arrays.toString(labels.shape())); - return; - } - if (input.size(2) != labels.size(2)) { - log.warn("Input and label time series have different lengths: {} input length, {} label length", - input.size(2), labels.size(2)); - return; - } - - int fwdLen = conf.getTbpttFwdLength(); - update(TaskUtils.buildTask(input, labels)); - val timeSeriesLength = input.size(2); - long nSubsets = timeSeriesLength / fwdLen; - if (timeSeriesLength % fwdLen != 0) - nSubsets++; //Example: 100 fwdLen with timeSeriesLength=120 -> want 2 subsets (1 of size 100, 1 of size 20) - - rnnClearPreviousState(); - - for (int i = 0; i < nSubsets; i++) { - long startTimeIdx = (long) i * fwdLen; - long endTimeIdx = startTimeIdx + fwdLen; - if (endTimeIdx > timeSeriesLength) - endTimeIdx = timeSeriesLength; - - if (startTimeIdx > Integer.MAX_VALUE || endTimeIdx > Integer.MAX_VALUE) - throw new ND4JArraySizeException(); - INDArray[] subsets = getSubsetsForTbptt((int) startTimeIdx, (int) endTimeIdx, input, labels, - featuresMaskArray, labelsMaskArray); - - setInput(subsets[0]); - setLabels(subsets[1]); - setLayerMaskArrays(subsets[2], subsets[3]); - - if (solver == null) { - try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { - solver = new Solver.Builder().configure(conf()).listeners(getTrainingListeners()).model(this) - .build(); - } - } - solver.optimize(workspaceMgr); - - //Finally, update the state of the RNN layers: - updateRnnStateWithTBPTTState(); - } - - rnnClearPreviousState(); - clearLayerMaskArrays(); - } - - private INDArray[] getSubsetsForTbptt(int startTimeIdx, int endTimeIdx, INDArray input, INDArray labels, - INDArray fMask, INDArray lMask ){ - INDArray[] out = new INDArray[4]; - out[0] = input.get(NDArrayIndex.all(), NDArrayIndex.all(), - NDArrayIndex.interval(startTimeIdx, endTimeIdx)); - out[1] = labels.get(NDArrayIndex.all(), NDArrayIndex.all(), - NDArrayIndex.interval(startTimeIdx, endTimeIdx)); - - if (fMask != null) { - out[2] = fMask.get(NDArrayIndex.all(), - NDArrayIndex.interval(startTimeIdx, endTimeIdx)); - } - if (lMask != null) { - out[3] = lMask.get(NDArrayIndex.all(), - NDArrayIndex.interval(startTimeIdx, endTimeIdx)); - } - - return out; - } - - /** - * Intended for internal/developer use - */ - public void updateRnnStateWithTBPTTState() { - Layer[] layers = conf.calculateInnerLayers().toArray(new Layer[]{}); - for (int i = 0; i < layers.length; i++) { - if (layers[i] instanceof RecurrentLayer) { - RecurrentLayer l = ((RecurrentLayer) layers[i]); - l.rnnSetPreviousState(l.rnnGetTBPTTState()); - } else if (layers[i] instanceof MultiLayerNetwork) { - ((MultiLayerNetwork) layers[i]).updateRnnStateWithTBPTTState(); - } - } - } - - /** Clear the previous state of the RNN layers (if any). - */ - public void rnnClearPreviousState() { - Layer[] layers = conf.getLayers().toArray(new Layer[]{}); - if (layers == null) - return; - for (int i = 0; i < layers.length; i++) { - if (layers[i] instanceof RecurrentLayer) - ((RecurrentLayer) layers[i]).rnnClearPreviousState(); - else if (layers[i] instanceof MultiLayerNetwork) { - ((MultiLayerNetwork) layers[i]).rnnClearPreviousState(); - } else if(layers[i] instanceof BaseWrapperLayer && ((BaseWrapperLayer)layers[i]).getUnderlying() instanceof RecurrentLayer){ - ((RecurrentLayer) ((BaseWrapperLayer)layers[i]).getUnderlying()).rnnClearPreviousState(); - } - } - } - - - - /** Remove the mask arrays from all layers.
- * See {@link #setLayerMaskArrays(INDArray, INDArray)} for details on mask arrays. - */ - public void clearLayerMaskArrays() { - Layer[] layers = conf.getLayers().toArray(new Layer[]{}); - for (Layer layer : layers) { - layer.setMaskArray(null); - } - } - - /** - * Increment the epoch count (in the underlying {@link MultiLayerConfiguration} by 1). - * Note that this is done automatically when using iterator-based fitting methods, such as - * {@link #fit(DataSetIterator)}. However, when using non-iterator fit methods (DataSet, INDArray/INDArray etc), - * the network has no way to know when one epoch ends and another starts. In such situations, this method - * can be used to increment the epoch counter.
- * Note that the epoch counter is used for situations such as some learning rate schedules, and the like. - * - * The current epoch count can be obtained using {@code MultiLayerConfiguration.getLayerwiseConfiguration().getEpochCount()} - */ - public void incrementEpochCount(){ - conf.setEpochCount(conf.getEpochCount() + 1); - synchronizeIterEpochCounts(); - } - - protected void synchronizeIterEpochCounts() { - //TODO: this is necessary for some schedules - but the redundant values are a little ugly... - int currIter = conf.getIterationCount(); - int currEpoch = conf.getEpochCount(); - log.error("Something went wrong here. Code incomplete"); - /*for(Layer l : conf.getLayers()) { - l.setIterationCount(currIter); - l.setEpochCount(currEpoch); - } - */ - } - - /** - * This method just makes sure there's no state preserved within layers - */ - public void clearLayersStates() { - for (Layer layer : layers) { - layer.clear(); - layer.clearNoiseWeightParams(); - } - } - - - /**Set the mask arrays for features and labels. Mask arrays are typically used in situations such as one-to-many - * and many-to-one learning with recurrent neural networks, as well as for supporting time series of varying lengths - * within the same minibatch.
- * For example, with RNN data sets with input of shape [miniBatchSize,nIn,timeSeriesLength] and outputs of shape - * [miniBatchSize,nOut,timeSeriesLength], the features and mask arrays will have shape [miniBatchSize,timeSeriesLength] - * and contain values 0 or 1 at each element (to specify whether a given input/example is present - or merely padding - - * at a given time step).
- * NOTE: This method is not usually used directly. Instead, methods such as @link #feedForward(INDArray, INDArray, INDArray)} - * and @link #output(INDArray, boolean, INDArray, INDArray)} handle setting of masking internally. - * @param featuresMaskArray Mask array for features (input) - * @param labelsMaskArray Mask array for labels (output) - * @see #clearLayerMaskArrays() - */ - public void setLayerMaskArrays(INDArray featuresMaskArray, INDArray labelsMaskArray) { - if (featuresMaskArray != null) { - - if (featuresMaskArray.size(0) > Integer.MAX_VALUE) - throw new ND4JArraySizeException(); - //New approach: use feedForwardMaskArray method - feedForwardMaskArray(featuresMaskArray, MaskState.Active, (int) featuresMaskArray.size(0)); - - - /* - //feedforward layers below a RNN layer: need the input (features) mask array - //Reason: even if the time series input is zero padded, the output from the dense layers are - // non-zero (i.e., activationFunction(0*weights + bias) != 0 in general) - //This assumes that the time series input is masked - i.e., values are 0 at the padded time steps, - // so we don't need to do anything for the recurrent layer - - //Now, if mask array is 2d -> need to reshape to 1d (column vector) in the exact same order - // as is done for 3d -> 2d time series reshaping - INDArray reshapedFeaturesMask = TimeSeriesUtils.reshapeTimeSeriesMaskToVector(featuresMaskArray); - - for( int i=0; i feedForwardMaskArray(INDArray maskArray, MaskState currentMaskState, - int minibatchSize) { - if (maskArray == null) { - for (int i = 0; i < layers.length; i++) { - layers[i].feedForwardMaskArray(null, null, minibatchSize); - } - } else { - //Do a forward pass through each preprocessor and layer - for (int i = 0; i < layers.length; i++) { - InputPreProcessor preProcessor = conf.getInputPreProcessors().get(i); - - if (preProcessor != null) { - Pair p = - preProcessor.feedForwardMaskArray(maskArray, currentMaskState, minibatchSize); - if (p != null) { - maskArray = p.getFirst(); - currentMaskState = p.getSecond(); - } else { - maskArray = null; - currentMaskState = null; - } - } - - Pair p = - layers[i].feedForwardMaskArray(maskArray, currentMaskState, minibatchSize); - if (p != null) { - maskArray = p.getFirst(); - currentMaskState = p.getSecond(); - } else { - maskArray = null; - currentMaskState = null; - } - } - } - - return new Pair<>(maskArray, currentMaskState); - } - - -} diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/networks/ArtificialNeuralNetwork.java b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/networks/ArtificialNeuralNetwork.java new file mode 100644 index 000000000..0a605b94f --- /dev/null +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/networks/ArtificialNeuralNetwork.java @@ -0,0 +1,53 @@ +/* + * + * ****************************************************************************** + * * + * * This program and the accompanying materials are made available under the + * * terms of the Apache License, Version 2.0 which is available at + * * https://www.apache.org/licenses/LICENSE-2.0. + * * + * * See the NOTICE file distributed with this work for additional + * * information regarding copyright ownership. + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * * License for the specific language governing permissions and limitations + * * under the License. + * * + * * SPDX-License-Identifier: Apache-2.0 + * ***************************************************************************** + * + */ + +package net.brutex.ai.dnn.networks; + +import lombok.Getter; +import lombok.Setter; +import net.brutex.ai.dnn.conf.NeuralNetworkConfiguration; +import net.brutex.ai.dnn.api.INeuralNetwork; + +/** + * Artificial Neural Network An artificial neural network (1) takes some input data, and (2) + * transforms this input data by calculating a weighted sum over the inputs and (3) applies a + * non-linear function to this transformation to calculate an intermediate state. The three steps + * above constitute what is known as a layer, and the transformative function is often referred to + * as a unit. The intermediate states—often termed features—are used as the input into another + * layer. + *

+ * Through repetition of these steps, the artificial neural network learns multiple layers of + * non-linear features, which it then combines in a final layer to create a prediction. + *

+ * The neural network learns by generating an error signal that measures the difference between the + * predictions of the network and the desired values and then using this error signal to change the + * weights (or parameters) so that predictions get more accurate. + */ +public abstract class ArtificialNeuralNetwork implements INeuralNetwork { + + /** + * A neural network is created from a configuration. + * @param conf The (new net.brutex.ai) configuration for the network + */ + @Getter + @Setter //TODO make this also final and @NonNull + private NeuralNetworkConfiguration configuration; +} diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/trainer/BaseEarlyStoppingTrainer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/trainer/BaseEarlyStoppingTrainer.java index a39a08d97..4d6ff7675 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/trainer/BaseEarlyStoppingTrainer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/trainer/BaseEarlyStoppingTrainer.java @@ -346,7 +346,7 @@ public abstract class BaseEarlyStoppingTrainer implements IEarl } else if(model instanceof ComputationGraph){ ComputationGraph cg = ((ComputationGraph) model); listeners = cg.getListeners(); - cg.getConfiguration().setEpochCount(epochNum); + cg.getComputationGraphConfiguration().setEpochCount(epochNum); } else { return; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/gradientcheck/GradientCheckUtil.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/gradientcheck/GradientCheckUtil.java index 121102214..696e92bc2 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/gradientcheck/GradientCheckUtil.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/gradientcheck/GradientCheckUtil.java @@ -431,7 +431,7 @@ public class GradientCheckUtil { + "DataTypeUtil.setDTypeForContext(DataType.DOUBLE); before using GradientCheckUtil"); } - DataType netDataType = c.net.getConfiguration().getDataType(); + DataType netDataType = c.net.getComputationGraphConfiguration().getDataType(); if (netDataType != DataType.DOUBLE) { throw new IllegalStateException("Cannot perform gradient check: Network datatype is not set to double precision (" + "is: " + netDataType + "). Double precision must be used for gradient checks. Create network with .dataType(DataType.DOUBLE) before using GradientCheckUtil"); @@ -444,8 +444,8 @@ public class GradientCheckUtil { //Check configuration int layerCount = 0; - for (String vertexName : c.net.getConfiguration().getVertices().keySet()) { - GraphVertex gv = c.net.getConfiguration().getVertices().get(vertexName); + for (String vertexName : c.net.getComputationGraphConfiguration().getVertices().keySet()) { + GraphVertex gv = c.net.getComputationGraphConfiguration().getVertices().get(vertexName); if (!(gv instanceof LayerVertex)) continue; LayerVertex lv = (LayerVertex) gv; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/Layer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/Layer.java index 60780ab99..e7500055f 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/Layer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/Layer.java @@ -32,194 +32,209 @@ import org.nd4j.common.primitives.Pair; import java.io.Serializable; import java.util.Collection; +/** + * A layer is the highest-level building block in deep learning. A layer is a container that usually + * receives weighted input, transforms it with a set of mostly non-linear functions and then passes + * these values as output to the next layer. A layer is usually uniform, that is it only contains + * one type of activation function, pooling, convolution etc. so that it can be easily compared to + * other parts of the network. The first and last layers in a network are called input and output + * layers, respectively, and all layers in between are called hidden layers. + * + * @see NVIDIA Deep Learning In A Nutshell + */ public interface Layer extends Serializable, Cloneable, Model, Trainable { - enum Type { - FEED_FORWARD, RECURRENT, CONVOLUTIONAL, CONVOLUTIONAL3D, - SUBSAMPLING, UPSAMPLING, RECURSIVE, MULTILAYER, NORMALIZATION - } + /** + * This method sets given CacheMode for current layer + * + * @param mode + */ + void setCacheMode(CacheMode mode); - enum TrainingMode { - TRAIN, TEST - } + /** + * Calculate the regularization component of the score, for the parameters in this layer
For + * example, the L1, L2 and/or weight decay components of the loss function
+ * + * @param backpropOnlyParams If true: calculate regularization score based on backprop params + * only. If false: calculate based on all params (including pretrain + * params, if any) + * @return the regularization score of + */ + double calcRegularizationScore(boolean backpropOnlyParams); - /** - * This method sets given CacheMode for current layer - * - * @param mode - */ - void setCacheMode(CacheMode mode); + /** + * Returns the layer type + * + * @return + */ + Type type(); - /** - * Calculate the regularization component of the score, for the parameters in this layer
- * For example, the L1, L2 and/or weight decay components of the loss function
- * - * @param backpropOnlyParams If true: calculate regularization score based on backprop params only. If false: calculate - * based on all params (including pretrain params, if any) - * @return the regularization score of - */ - double calcRegularizationScore(boolean backpropOnlyParams); + /** + * Calculate the gradient relative to the error in the next layer + * + * @param epsilon w^(L+1)*delta^(L+1). Or, equiv: dC/da, i.e., (dC/dz)*(dz/da) = dC/da, where + * C is cost function a=sigma(z) is activation. + * @param workspaceMgr Workspace manager + * @return Pair where Gradient is gradient for this layer, INDArray is + * epsilon (activation gradient) needed by next layer, but before element-wise multiply by + * sigmaPrime(z). So for standard feed-forward layer, if this layer is L, then return.getSecond() + * == dL/dIn = (w^(L)*(delta^(L))^T)^T. Note that the returned array should be placed in the + * {@link org.deeplearning4j.nn.workspace.ArrayType#ACTIVATION_GRAD} workspace via the workspace + * manager + */ + Pair backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr); - /** - * Returns the layer type - * - * @return - */ - Type type(); + /** + * Perform forward pass and return the activations array with the last set input + * + * @param training training or test mode + * @param workspaceMgr Workspace manager + * @return the activation (layer output) of the last specified input. Note that the returned array + * should be placed in the {@link org.deeplearning4j.nn.workspace.ArrayType#ACTIVATIONS} workspace + * via the workspace manager + */ + INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr); + + /** + * Perform forward pass and return the activations array with the specified input + * + * @param input the input to use + * @param training train or test mode + * @param mgr Workspace manager. + * @return Activations array. Note that the returned array should be placed in the + * {@link org.deeplearning4j.nn.workspace.ArrayType#ACTIVATIONS} workspace via the workspace + * manager + */ + INDArray activate(INDArray input, boolean training, LayerWorkspaceMgr mgr); + + /** + * Get the iteration listeners for this layer. + */ + Collection getListeners(); + + /** + * Set the {@link TrainingListener}s for this model. If any listeners have previously been set, + * they will be replaced by this method + */ + void setListeners(TrainingListener... listeners); + + /** + * Set the {@link TrainingListener}s for this model. If any listeners have previously been set, + * they will be replaced by this method + */ + void setListeners(Collection listeners); + + /** + * Get the layer index. + */ + int getIndex(); + + /** + * Set the layer index. + */ + void setIndex(int index); + + /** + * @return The current iteration count (number of parameter updates) for the layer/network + */ + int getIterationCount(); + + /** + * Set the current iteration count (number of parameter updates) for the layer/network + */ + void setIterationCount(int iterationCount); + + /** + * @return The current epoch count (number of training epochs passed) for the layer/network + */ + int getEpochCount(); + + /** + * Set the current epoch count (number of epochs passed ) for the layer/network + */ + void setEpochCount(int epochCount); + + /** + * Set the layer input. + */ + void setInput(INDArray input, LayerWorkspaceMgr workspaceMgr); + + /** + * Get current/last input mini-batch size, as set by setInputMiniBatchSize(int) + * + * @see Layer#setInputMiniBatchSize(int) + */ + int getInputMiniBatchSize(); + + /** + * Set current/last input mini-batch size.
Used for score and gradient calculations. Mini + * batch size may be different from getInput().size(0) due to reshaping operations - for example, + * when using RNNs with DenseLayerConfiguration and OutputLayer. Called automatically during + * forward pass. + */ + void setInputMiniBatchSize(int size); + + INDArray getMaskArray(); + + /** + * Set the mask array. Note: In general, {@link #feedForwardMaskArray(INDArray, MaskState, int)} + * should be used in preference to this. + * + * @param maskArray Mask array to set + */ + void setMaskArray(INDArray maskArray); + + /** + * Returns true if the layer can be trained in an unsupervised/pretrain manner (AE, VAE, etc) + * + * @return true if the layer can be pretrained (using fit(INDArray), false otherwise + */ + boolean isPretrainLayer(); + + void clearNoiseWeightParams(); + + /** + * A performance optimization: mark whether the layer is allowed to modify its input array + * in-place. In many cases, this is totally safe - in others, the input array will be shared by + * multiple layers, and hence it's not safe to modify the input array. This is usually used by ops + * such as dropout. + * + * @param allow If true: the input array is safe to modify. If false: the input array should be + * copied before it is modified (i.e., in-place modifications are un-safe) + */ + void allowInputModification(boolean allow); + + /** + * Feed forward the input mask array, setting in the layer as appropriate. This allows different + * layers to handle masks differently - for example, bidirectional RNNs and normal RNNs operate + * differently with masks (the former sets activations to 0 outside of the data present region + * (and keeps the mask active for future layers like dense layers), whereas normal RNNs don't zero + * out the activations/errors )instead relying on backpropagated error arrays to handle the + * variable length case.
This is also used for example for networks that contain global + * pooling layers, arbitrary preprocessors, etc. + * + * @param maskArray Mask array to set + * @param currentMaskState Current state of the mask - see {@link MaskState} + * @param minibatchSize Current minibatch size. Needs to be known as it cannot always be + * inferred from the activations array due to reshaping (such as a + * DenseLayerConfiguration within a recurrent neural network) + * @return New mask array after this layer, along with the new mask state. + */ + Pair feedForwardMaskArray(INDArray maskArray, MaskState currentMaskState, + int minibatchSize); + + /** + * @return Get the layer helper, if any + */ + LayerHelper getHelper(); - /** - * Calculate the gradient relative to the error in the next layer - * - * @param epsilon w^(L+1)*delta^(L+1). Or, equiv: dC/da, i.e., (dC/dz)*(dz/da) = dC/da, where C - * is cost function a=sigma(z) is activation. - * @param workspaceMgr Workspace manager - * @return Pair where Gradient is gradient for this layer, INDArray is epsilon (activation gradient) - * needed by next layer, but before element-wise multiply by sigmaPrime(z). So for standard feed-forward layer, if this layer is - * L, then return.getSecond() == dL/dIn = (w^(L)*(delta^(L))^T)^T. Note that the returned array should be placed in the - * {@link org.deeplearning4j.nn.workspace.ArrayType#ACTIVATION_GRAD} workspace via the workspace manager - */ - Pair backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr); + enum Type { + FEED_FORWARD, RECURRENT, CONVOLUTIONAL, CONVOLUTIONAL3D, + SUBSAMPLING, UPSAMPLING, RECURSIVE, MULTILAYER, NORMALIZATION + } - - /** - * Perform forward pass and return the activations array with the last set input - * - * @param training training or test mode - * @param workspaceMgr Workspace manager - * @return the activation (layer output) of the last specified input. Note that the returned array should be placed - * in the {@link org.deeplearning4j.nn.workspace.ArrayType#ACTIVATIONS} workspace via the workspace manager - */ - INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr); - - /** - * Perform forward pass and return the activations array with the specified input - * - * @param input the input to use - * @param training train or test mode - * @param mgr Workspace manager. - * @return Activations array. Note that the returned array should be placed in the - * {@link org.deeplearning4j.nn.workspace.ArrayType#ACTIVATIONS} workspace via the workspace manager - */ - INDArray activate(INDArray input, boolean training, LayerWorkspaceMgr mgr); - - /** - * Get the iteration listeners for this layer. - */ - Collection getListeners(); - - /** - * Set the {@link TrainingListener}s for this model. If any listeners have previously been set, they will be - * replaced by this method - */ - void setListeners(TrainingListener... listeners); - - /** - * Set the {@link TrainingListener}s for this model. If any listeners have previously been set, they will be - * replaced by this method - */ - void setListeners(Collection listeners); - - /** - * Set the layer index. - */ - void setIndex(int index); - - /** - * Get the layer index. - */ - int getIndex(); - - /** - * @return The current iteration count (number of parameter updates) for the layer/network - */ - int getIterationCount(); - - /** - * @return The current epoch count (number of training epochs passed) for the layer/network - */ - int getEpochCount(); - - /** - * Set the current iteration count (number of parameter updates) for the layer/network - */ - void setIterationCount(int iterationCount); - - /** - * Set the current epoch count (number of epochs passed ) for the layer/network - */ - void setEpochCount(int epochCount); - - /** - * Set the layer input. - */ - void setInput(INDArray input, LayerWorkspaceMgr workspaceMgr); - - /** - * Set current/last input mini-batch size.
- * Used for score and gradient calculations. Mini batch size may be different from - * getInput().size(0) due to reshaping operations - for example, when using RNNs with - * DenseLayer and OutputLayer. Called automatically during forward pass. - */ - void setInputMiniBatchSize(int size); - - /** - * Get current/last input mini-batch size, as set by setInputMiniBatchSize(int) - * - * @see Layer#setInputMiniBatchSize(int) - */ - int getInputMiniBatchSize(); - - /** - * Set the mask array. Note: In general, {@link #feedForwardMaskArray(INDArray, MaskState, int)} should be used in - * preference to this. - * - * @param maskArray Mask array to set - */ - void setMaskArray(INDArray maskArray); - - - INDArray getMaskArray(); - - /** - * Returns true if the layer can be trained in an unsupervised/pretrain manner (AE, VAE, etc) - * - * @return true if the layer can be pretrained (using fit(INDArray), false otherwise - */ - boolean isPretrainLayer(); - - - void clearNoiseWeightParams(); - - /** - * A performance optimization: mark whether the layer is allowed to modify its input array in-place. In many cases, - * this is totally safe - in others, the input array will be shared by multiple layers, and hence it's not safe to - * modify the input array. - * This is usually used by ops such as dropout. - * @param allow If true: the input array is safe to modify. If false: the input array should be copied before it - * is modified (i.e., in-place modifications are un-safe) - */ - void allowInputModification(boolean allow); - - - /** - * Feed forward the input mask array, setting in the layer as appropriate. This allows different layers to - * handle masks differently - for example, bidirectional RNNs and normal RNNs operate differently with masks (the - * former sets activations to 0 outside of the data present region (and keeps the mask active for future layers like - * dense layers), whereas normal RNNs don't zero out the activations/errors )instead relying on backpropagated error - * arrays to handle the variable length case.
- * This is also used for example for networks that contain global pooling layers, arbitrary preprocessors, etc. - * - * @param maskArray Mask array to set - * @param currentMaskState Current state of the mask - see {@link MaskState} - * @param minibatchSize Current minibatch size. Needs to be known as it cannot always be inferred from the activations - * array due to reshaping (such as a DenseLayer within a recurrent neural network) - * @return New mask array after this layer, along with the new mask state. - */ - Pair feedForwardMaskArray(INDArray maskArray, MaskState currentMaskState, int minibatchSize); - - /** - * @return Get the layer helper, if any - */ - LayerHelper getHelper(); + enum TrainingMode { + TRAIN, TEST + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/ModelAdapter.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/ModelAdapter.java index 8b7d816d6..01a60b73e 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/ModelAdapter.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/ModelAdapter.java @@ -25,7 +25,7 @@ import org.nd4j.linalg.api.ndarray.INDArray; public interface ModelAdapter extends OutputAdapter { /** - * This method invokes model internally, and does convertion to T + * This method invokes model internally, and does conversion to T * @return */ T apply(Model model, INDArray[] inputs, INDArray[] inputMasks, INDArray[] labelsMasks); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/ParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/ParamInitializer.java index 7170953e9..7b6483483 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/ParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/ParamInitializer.java @@ -41,7 +41,7 @@ public interface ParamInitializer { /** * Get a list of all parameter keys given the layer configuration * - * @param layer Layer + * @param layer ILayer * @return All parameter keys */ List paramKeys(org.deeplearning4j.nn.conf.layers.Layer layer); @@ -49,7 +49,7 @@ public interface ParamInitializer { /** * Weight parameter keys given the layer configuration * - * @param layer Layer + * @param layer ILayer * @return Weight parameter keys */ List weightKeys(org.deeplearning4j.nn.conf.layers.Layer layer); @@ -57,7 +57,7 @@ public interface ParamInitializer { /** * Bias parameter keys given the layer configuration * - * @param layer Layer + * @param layer ILayer * @return Bias parameter keys */ List biasKeys(org.deeplearning4j.nn.conf.layers.Layer layer); @@ -65,7 +65,7 @@ public interface ParamInitializer { /** * Is the specified parameter a weight? * - * @param layer Layer + * @param layer ILayer * @param key Key to check * @return True if parameter is a weight */ @@ -74,7 +74,7 @@ public interface ParamInitializer { /** * Is the specified parameter a bias? * - * @param layer Layer + * @param layer ILayer * @param key Key to check * @return True if parameter is a bias */ diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/TrainingConfig.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/TrainingConfig.java index ae7601a6f..58f101260 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/TrainingConfig.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/TrainingConfig.java @@ -47,7 +47,7 @@ public interface TrainingConfig { * Is the specified parameter a layerwise pretraining only parameter?
* For example, visible bias params in an autoencoder (or, decoder params in a variational autoencoder) aren't * used during supervised backprop.
- * Layers (like DenseLayer, etc) with no pretrainable parameters will return false for all (valid) inputs. + * Layers (like DenseLayerConfiguration, etc) with no pretrainable parameters will return false for all (valid) inputs. * * @param paramName Parameter name/key * @return True if the parameter is for layerwise pretraining only, false otherwise diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/Updater.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/Updater.java index d63b57bb8..2c01298cb 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/Updater.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/Updater.java @@ -36,7 +36,7 @@ public interface Updater extends Serializable { /** * Set the internal (historical) state view array for this updater * - * @param layer Layer that this updater belongs to + * @param layer ILayer that this updater belongs to * @param viewArray View array * @param initialize Whether to initialize the array or not */ diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/layers/LayerConstraint.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/layers/LayerConstraint.java index fff8bd77d..cfa82b050 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/layers/LayerConstraint.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/layers/LayerConstraint.java @@ -33,7 +33,7 @@ public interface LayerConstraint extends Cloneable, Serializable { * Apply a given constraint to a layer at each iteration * in the provided epoch, after parameters have been updated. * - * @param layer org.deeplearning4j.nn.api.Layer + * @param layer org.deeplearning4j.nn.api.ILayer * @param iteration given iteration as integer * @param epoch current epoch as integer */ diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/layers/RecurrentLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/layers/RecurrentLayer.java index 62050b88e..a4f73d3b0 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/layers/RecurrentLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/layers/RecurrentLayer.java @@ -66,10 +66,10 @@ public interface RecurrentLayer extends Layer { * (a) result in the same output
* (b) leave the state maps (both stateMap and tBpttStateMap) in an identical state * - * @param input Layer input + * @param input ILayer input * @param training if true: training. Otherwise: test * @param storeLastForTBPTT If true: store the final state in tBpttStateMap for use in truncated BPTT training - * @return Layer activations + * @return ILayer activations */ INDArray rnnActivateUsingStoredState(INDArray input, boolean training, boolean storeLastForTBPTT, LayerWorkspaceMgr workspaceMg); @@ -92,7 +92,7 @@ public interface RecurrentLayer extends Layer { void rnnSetTBPTTState(Map state); /** - * Truncated BPTT equivalent of Layer.backpropGradient(). + * Truncated BPTT equivalent of ILayer.backpropGradient(). * Primary difference here is that forward pass in the context of BPTT is that we do * forward pass using stored state for truncated BPTT vs. from zero initialization * for standard BPTT. diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java index 69ff898e2..f44a8f3ab 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java @@ -25,6 +25,7 @@ import lombok.EqualsAndHashCode; import lombok.NoArgsConstructor; import lombok.NonNull; import lombok.extern.slf4j.Slf4j; +import net.brutex.ai.dnn.api.INeuralNetworkConfiguration; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.conf.distribution.Distribution; @@ -68,7 +69,9 @@ import java.util.*; @NoArgsConstructor @Slf4j @EqualsAndHashCode(exclude = {"iterationCount", "epochCount"}) -public class NeuralNetConfiguration implements Serializable, Cloneable { +public class NeuralNetConfiguration implements Serializable, Cloneable, + INeuralNetworkConfiguration { + protected Layer layer; //batch size: primarily used for conv nets. Will be reinforced if set. diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MaxNormConstraint.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MaxNormConstraint.java index 43fdc4254..a38e6dfcf 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MaxNormConstraint.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MaxNormConstraint.java @@ -43,7 +43,7 @@ public class MaxNormConstraint extends BaseConstraint { /** * @param maxNorm Maximum L2 value * @param paramNames Which parameter names to apply constraint to - * @param dimensions Dimensions to apply to. For DenseLayer, OutputLayer, RnnOutputLayer, LSTM, etc: this should + * @param dimensions Dimensions to apply to. For DenseLayerConfiguration, OutputLayer, RnnOutputLayer, LSTM, etc: this should * be dimension 1. For CNNs, this should be dimensions [1,2,3] corresponding to last 3 of * parameters which have order [depthOut, depthIn, kH, kW] */ @@ -56,7 +56,7 @@ public class MaxNormConstraint extends BaseConstraint { * Apply to weights but not biases by default * * @param maxNorm Maximum L2 value - * @param dimensions Dimensions to apply to. For DenseLayer, OutputLayer, RnnOutputLayer, LSTM, etc: this should + * @param dimensions Dimensions to apply to. For DenseLayerConfiguration, OutputLayer, RnnOutputLayer, LSTM, etc: this should * be dimension 1. For CNNs, this should be dimensions [1,2,3] corresponding to last 3 of * parameters which have order [depthOut, depthIn, kH, kW] */ diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MinMaxNormConstraint.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MinMaxNormConstraint.java index 6449a9abd..ca43d4ca0 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MinMaxNormConstraint.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MinMaxNormConstraint.java @@ -51,7 +51,7 @@ public class MinMaxNormConstraint extends BaseConstraint { * * @param max Maximum L2 value * @param min Minimum L2 value - * @param dimensions Dimensions to apply to. For DenseLayer, OutputLayer, RnnOutputLayer, LSTM, etc: this should + * @param dimensions Dimensions to apply to. For DenseLayerConfiguration, OutputLayer, RnnOutputLayer, LSTM, etc: this should * be dimension 1. For CNNs, this should be dimensions [1,2,3] corresponding to last 3 of * parameters which have order [depthOut, depthIn, kH, kW] */ @@ -65,7 +65,7 @@ public class MinMaxNormConstraint extends BaseConstraint { * @param max Maximum L2 value * @param min Minimum L2 value * @param rate Constraint rate - * @param dimensions Dimensions to apply to. For DenseLayer, OutputLayer, RnnOutputLayer, LSTM, etc: this should + * @param dimensions Dimensions to apply to. For DenseLayerConfiguration, OutputLayer, RnnOutputLayer, LSTM, etc: this should * be dimension 1. For CNNs, this should be dimensions [1,2,3] corresponding to last 3 of * parameters which have order [depthOut, depthIn, kH, kW] */ @@ -79,7 +79,7 @@ public class MinMaxNormConstraint extends BaseConstraint { * @param min Minimum L2 value * @param rate Constraint rate * @param paramNames Which parameter names to apply constraint to - * @param dimensions Dimensions to apply to. For DenseLayer, OutputLayer, RnnOutputLayer, LSTM, etc: this should + * @param dimensions Dimensions to apply to. For DenseLayerConfiguration, OutputLayer, RnnOutputLayer, LSTM, etc: this should * be dimension 1. For CNNs, this should be dimensions [1,2,3] corresponding to last 3 of * parameters which have order [depthOut, depthIn, kH, kW] */ diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/UnitNormConstraint.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/UnitNormConstraint.java index a082056a7..3e80f341b 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/UnitNormConstraint.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/UnitNormConstraint.java @@ -39,7 +39,7 @@ public class UnitNormConstraint extends BaseConstraint { /** * Apply to weights but not biases by default * - * @param dimensions Dimensions to apply to. For DenseLayer, OutputLayer, RnnOutputLayer, LSTM, etc: this should + * @param dimensions Dimensions to apply to. For DenseLayerConfiguration, OutputLayer, RnnOutputLayer, LSTM, etc: this should * be dimension 1. For CNNs, this should be dimensions [1,2,3] corresponding to last 3 of * parameters which have order [depthOut, depthIn, kH, kW] */ @@ -49,7 +49,7 @@ public class UnitNormConstraint extends BaseConstraint { /** - * @param dimensions Dimensions to apply to. For DenseLayer, OutputLayer, RnnOutputLayer, LSTM, etc: this should + * @param dimensions Dimensions to apply to. For DenseLayerConfiguration, OutputLayer, RnnOutputLayer, LSTM, etc: this should * be dimension 1. For CNNs, this should be dimensions [1,2,3] corresponding to last 3 of * parameters which have order [depthOut, depthIn, kH, kW] */ diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/LayerVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/LayerVertex.java index b1734682d..0c7565db1 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/LayerVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/LayerVertex.java @@ -21,7 +21,6 @@ package org.deeplearning4j.nn.conf.graph; import lombok.Data; -import lombok.EqualsAndHashCode; import lombok.NoArgsConstructor; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -40,8 +39,8 @@ public class LayerVertex extends GraphVertex { private NeuralNetConfiguration layerConf; private InputPreProcessor preProcessor; - //Set outputVertex to true when Layer is an OutputLayer, OR For use in specialized situations like reinforcement learning - // For RL situations, this Layer insn't an OutputLayer, but is the last layer in a graph, that gets its error/epsilon + //Set outputVertex to true when ILayer is an OutputLayer, OR For use in specialized situations like reinforcement learning + // For RL situations, this ILayer insn't an OutputLayer, but is the last layer in a graph, that gets its error/epsilon // passed in externally private boolean outputVertex; @@ -99,7 +98,7 @@ public class LayerVertex extends GraphVertex { public org.deeplearning4j.nn.graph.vertex.GraphVertex instantiate(ComputationGraph graph, String name, int idx, INDArray paramsView, boolean initializeParams, DataType networkDatatype) { //Now, we need to work out if this vertex is an output vertex or not... - boolean isOutput = graph.getConfiguration().getNetworkOutputs().contains(name); + boolean isOutput = graph.getComputationGraphConfiguration().getNetworkOutputs().contains(name); org.deeplearning4j.nn.api.Layer layer = layerConf.getLayer().instantiate(layerConf, null, idx, paramsView, initializeParams, networkDatatype); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ActivationLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ActivationLayer.java index 0fb559c74..0b10cedd4 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ActivationLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ActivationLayer.java @@ -134,7 +134,7 @@ public class ActivationLayer extends NoParamLayer { private IActivation activationFn = null; /** - * Layer activation function. Typical values include:
"relu" (rectified linear), "tanh", "sigmoid", + * ILayer activation function. Typical values include:
"relu" (rectified linear), "tanh", "sigmoid", * "softmax", "hardtanh", "leakyrelu", "maxout", "softsign", "softplus" * * @deprecated Use {@link #activation(Activation)} or {@link @activation(IActivation)} diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseLayer.java index fc751e91b..6aad5b0ef 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseLayer.java @@ -176,7 +176,7 @@ public abstract class BaseLayer extends Layer implements Serializable, Cloneable protected double biasInit = Double.NaN; /** - * Gain initialization value, for layers with Layer Normalization. Defaults to 1 + * Gain initialization value, for layers with ILayer Normalization. Defaults to 1 * */ protected double gainInit = Double.NaN; @@ -292,7 +292,7 @@ public abstract class BaseLayer extends Layer implements Serializable, Cloneable } /** - * Gain initialization value, for layers with Layer Normalization. Defaults to 1 + * Gain initialization value, for layers with ILayer Normalization. Defaults to 1 * * @param gainInit Value to use for initializing gain */ diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CapsuleLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CapsuleLayer.java index c6f31faf3..4081930c9 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CapsuleLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CapsuleLayer.java @@ -63,14 +63,14 @@ public class CapsuleLayer extends SameDiffLayer { this.routings = builder.routings; if(capsules <= 0 || capsuleDimensions <= 0 || routings <= 0){ - throw new IllegalArgumentException("Invalid configuration for Capsule Layer (layer name = \"" + throw new IllegalArgumentException("Invalid configuration for Capsule ILayer (layer name = \"" + layerName + "\"):" + " capsules, capsuleDimensions, and routings must be > 0. Got: " + capsules + ", " + capsuleDimensions + ", " + routings); } if(inputCapsules < 0 || inputCapsuleDimensions < 0){ - throw new IllegalArgumentException("Invalid configuration for Capsule Layer (layer name = \"" + throw new IllegalArgumentException("Invalid configuration for Capsule ILayer (layer name = \"" + layerName + "\"):" + " inputCapsules and inputCapsuleDimensions must be >= 0 if set. Got: " + inputCapsules + ", " + inputCapsuleDimensions); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DenseLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DenseLayer.java index d77f13e5c..1a6ce905c 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DenseLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DenseLayer.java @@ -55,7 +55,7 @@ public class DenseLayer extends FeedForwardLayer { @Override public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { - LayerValidation.assertNInNOutSet("DenseLayer", getLayerName(), layerIndex, getNIn(), getNOut()); + LayerValidation.assertNInNOutSet("DenseLayerConfiguration", getLayerName(), layerIndex, getNIn(), getNOut()); org.deeplearning4j.nn.layers.feedforward.dense.DenseLayer ret = new org.deeplearning4j.nn.layers.feedforward.dense.DenseLayer(conf, networkDataType); @@ -101,7 +101,7 @@ public class DenseLayer extends FeedForwardLayer { return new LayerMemoryReport.Builder(layerName, DenseLayer.class, inputType, outputType) .standardMemory(numParams, updaterStateSize) .workingMemory(0, 0, trainSizeFixed, trainSizeVariable) //No additional memory (beyond activations) for inference - .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching in DenseLayer + .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching in DenseLayerConfiguration .build(); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Layer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Layer.java index a96ec6db7..66f48dd14 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Layer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Layer.java @@ -205,7 +205,7 @@ public abstract class Layer implements TrainingConfig, Serializable, Cloneable { /** * Is the specified parameter a layerwise pretraining only parameter?
For example, visible * bias params in an autoencoder (or, decoder params in a variational autoencoder) aren't used - * during supervised backprop.
Layers (like DenseLayer, etc) with no pretrainable parameters + * during supervised backprop.
Layers (like DenseLayerConfiguration, etc) with no pretrainable parameters * will return false for all (valid) inputs. * * @param paramName Parameter name/key @@ -255,7 +255,7 @@ public abstract class Layer implements TrainingConfig, Serializable, Cloneable { protected IDropout iDropout; /** - * Layer name assigns layer string name. Allows easier differentiation between layers. + * ILayer name assigns layer string name. Allows easier differentiation between layers. */ public T name(String layerName) { this.setLayerName(layerName); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerValidation.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerValidation.java index 2a5f16be6..571f884e3 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerValidation.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerValidation.java @@ -42,7 +42,7 @@ public class LayerValidation { /** * Asserts that the layer nIn and nOut values are set for the layer * - * @param layerType Type of layer ("DenseLayer", etc) + * @param layerType Type of layer ("DenseLayerConfiguration", etc) * @param layerName Name of the layer (may be null if not set) * @param layerIndex Index of the layer * @param nIn nIn value @@ -60,7 +60,7 @@ public class LayerValidation { /** * Asserts that the layer nOut value is set for the layer * - * @param layerType Type of layer ("DenseLayer", etc) + * @param layerType Type of layer ("DenseLayerConfiguration", etc) * @param layerName Name of the layer (may be null if not set) * @param layerIndex Index of the layer * @param nOut nOut value diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocalResponseNormalization.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocalResponseNormalization.java index 8648a2814..98d7fa093 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocalResponseNormalization.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocalResponseNormalization.java @@ -147,7 +147,7 @@ public class LocalResponseNormalization extends Layer { return new LayerMemoryReport.Builder(layerName, DenseLayer.class, inputType, inputType).standardMemory(0, 0) .workingMemory(0, 2 * actElementsPerEx, 0, 3 * actElementsPerEx) - .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching in DenseLayer + .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching in DenseLayerConfiguration .build(); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PrimaryCapsules.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PrimaryCapsules.java index 2107bdede..4d3f56a84 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PrimaryCapsules.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PrimaryCapsules.java @@ -87,7 +87,7 @@ public class PrimaryCapsules extends SameDiffLayer { } if(capsules < 0){ - throw new IllegalArgumentException("Invalid configuration for Capsule Layer (layer name = \"" + throw new IllegalArgumentException("Invalid configuration for Capsule ILayer (layer name = \"" + layerName + "\"):" + " capsules must be >= 0 if set. Got: " + capsules); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/ElementWiseMultiplicationLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/ElementWiseMultiplicationLayer.java index 79ab2ca54..9eea40cfc 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/ElementWiseMultiplicationLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/ElementWiseMultiplicationLayer.java @@ -113,7 +113,7 @@ public class ElementWiseMultiplicationLayer extends org.deeplearning4j.nn.conf.l return new LayerMemoryReport.Builder(layerName, ElementWiseMultiplicationLayer.class, inputType, outputType) .standardMemory(numParams, updaterStateSize) .workingMemory(0, 0, trainSizeFixed, trainSizeVariable) //No additional memory (beyond activations) for inference - .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching in DenseLayer + .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching in DenseLayerConfiguration .build(); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/TimeDistributed.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/TimeDistributed.java index d6004f6bb..54a93b904 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/TimeDistributed.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/TimeDistributed.java @@ -44,7 +44,7 @@ public class TimeDistributed extends BaseWrapperLayer { private RNNFormat rnnDataFormat = RNNFormat.NCW; /** - * @param underlying Underlying (internal) layer - should be a feed forward type such as DenseLayer + * @param underlying Underlying (internal) layer - should be a feed forward type such as DenseLayerConfiguration */ public TimeDistributed(@JsonProperty("underlying") @NonNull Layer underlying, @JsonProperty("rnnDataFormat") RNNFormat rnnDataFormat) { super(underlying); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffLambdaLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffLambdaLayer.java index 51cdb3b6f..0b68bf649 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffLambdaLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffLambdaLayer.java @@ -33,7 +33,7 @@ public abstract class SameDiffLambdaLayer extends SameDiffLayer { * The defineLayer method is used to define the forward pass for the layer * * @param sameDiff SameDiff instance to use to define the vertex - * @param layerInput Layer input variable + * @param layerInput ILayer input variable * @return The output variable (corresponding to the output activations for the layer) */ public abstract SDVariable defineLayer(SameDiff sameDiff, SDVariable layerInput); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffLambdaVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffLambdaVertex.java index d3c10ec2f..7ec4fb2d5 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffLambdaVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffLambdaVertex.java @@ -37,7 +37,7 @@ public abstract class SameDiffLambdaVertex extends SameDiffVertex { * The defineVertex method is used to define the foward pass for the vertex * * @param sameDiff SameDiff instance to use to define the vertex - * @param inputs Layer input variable + * @param inputs ILayer input variable * @return The output variable (orresponding to the output activations for the vertex) */ public abstract SDVariable defineVertex(SameDiff sameDiff, VertexInputs inputs); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/wrapper/BuildingBlockLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/wrapper/BuildingBlockLayer.java deleted file mode 100644 index e150b850f..000000000 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/wrapper/BuildingBlockLayer.java +++ /dev/null @@ -1,97 +0,0 @@ -/* - * - * ****************************************************************************** - * * - * * This program and the accompanying materials are made available under the - * * terms of the Apache License, Version 2.0 which is available at - * * https://www.apache.org/licenses/LICENSE-2.0. - * * - * * See the NOTICE file distributed with this work for additional - * * information regarding copyright ownership. - * * Unless required by applicable law or agreed to in writing, software - * * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * * License for the specific language governing permissions and limitations - * * under the License. - * * - * * SPDX-License-Identifier: Apache-2.0 - * ***************************************************************************** - * - */ - -package org.deeplearning4j.nn.conf.layers.wrapper; - -import java.util.Collection; -import lombok.AccessLevel; -import lombok.Builder; -import lombok.Getter; -import lombok.NonNull; -import net.brutex.ai.dnn.api.LayerConfiguration; -import net.brutex.ai.dnn.api.NeuralNetwork; -import org.deeplearning4j.nn.api.Layer; -import org.deeplearning4j.nn.api.ParamInitializer; -import org.deeplearning4j.nn.conf.InputPreProcessor; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import net.brutex.ai.dnn.conf.NeuralNetworkConfiguration; -import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.conf.layers.BaseLayer; -import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; -import org.deeplearning4j.optimize.api.TrainingListener; -import org.nd4j.linalg.api.buffer.DataType; -import org.nd4j.linalg.api.ndarray.INDArray; - -@Builder(builderClassName = "Builder", access = AccessLevel.PUBLIC) -public class BuildingBlockLayer extends BaseLayer implements LayerConfiguration { - - @NonNull - @Getter - private NeuralNetworkConfiguration conf; - - @Override - public Layer instantiate(NeuralNetConfiguration conf, - Collection trainingListeners, int layerIndex, INDArray layerParamsView, - boolean initializeParams, DataType networkDataType) { - return null; - } - - @Override - public ParamInitializer initializer() { - return null; - } - - @Override - public InputType getOutputType(int layerIndex, InputType inputType) { - return null; - } - - @Override - public void setNIn(InputType inputType, boolean override) { - - } - - @Override - public InputPreProcessor getPreProcessorForInputType(InputType inputType) { - return null; - } - - @Override - public boolean isPretrainParam(String paramName) { - return false; - } - - @Override - public LayerMemoryReport getMemoryReport(InputType inputType) { - return null; - } - - /** - * Create and return an instance of a LayerConfiguration. - * - * @param network the "holding" network for the instance - * @return the new layer instance - */ - @Override - public net.brutex.ai.dnn.api.Layer instantiate(NeuralNetwork network) { - return null; - } -} diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/memory/NetworkMemoryReport.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/memory/NetworkMemoryReport.java index 9182ccfb9..d3f7b1955 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/memory/NetworkMemoryReport.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/memory/NetworkMemoryReport.java @@ -153,7 +153,7 @@ public class NetworkMemoryReport extends MemoryReport { .append(modelName).append("\n").append(" Network Input: ") .append(Arrays.toString(networkInputTypes)).append("\n") .append(" # Layers: ").append(layerAndVertexReports.size()) - .append("\n").append(" Layer Types: ").append(sbLayerCounts) + .append("\n").append(" ILayer Types: ").append(sbLayerCounts) .append("\n"); appendFixedPlusVariable(sb, " Inference Memory (FP32) ", fixedMemBytes, perEx); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/weightnoise/IWeightNoise.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/weightnoise/IWeightNoise.java index 4c45b762f..c6c77d3d2 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/weightnoise/IWeightNoise.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/weightnoise/IWeightNoise.java @@ -33,7 +33,7 @@ public interface IWeightNoise extends Serializable, Cloneable{ /** * Get the parameter, after applying weight noise * - * @param layer Layer to get the parameter for + * @param layer ILayer to get the parameter for * @param paramKey Parameter key * @param iteration Iteration number * @param epoch Epoch number diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java index ac8a05be4..4a080bb28 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java @@ -25,6 +25,8 @@ import lombok.NonNull; import lombok.Setter; import lombok.extern.slf4j.Slf4j; import lombok.val; +import net.brutex.ai.dnn.api.INeuralNetwork; +import net.brutex.ai.dnn.networks.ArtificialNeuralNetwork; import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.StringUtils; import org.bytedeco.javacpp.Pointer; @@ -103,9 +105,16 @@ import java.util.*; import java.util.concurrent.atomic.AtomicLong; @Slf4j -public class ComputationGraph implements Serializable, Model, NeuralNetwork { +public class ComputationGraph extends ArtificialNeuralNetwork implements Serializable, Model, + INeuralNetwork { - protected ComputationGraphConfiguration configuration; + /** + * This method returns configuration of this ComputationGraph + * + * @return + */ + @Getter + protected ComputationGraphConfiguration computationGraphConfiguration; protected boolean initCalled = false; protected transient Solver solver; //Used to call optimizers during backprop protected INDArray flattenedParams; //Params for all layers are a view/subset of this array @@ -210,17 +219,17 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { private Collection trainingListeners = new ArrayList<>(); - public ComputationGraph(ComputationGraphConfiguration configuration) { - this.configuration = configuration; - this.numInputArrays = configuration.getNetworkInputs().size(); - this.numOutputArrays = configuration.getNetworkOutputs().size(); + public ComputationGraph(ComputationGraphConfiguration computationGraphConfiguration) { + this.computationGraphConfiguration = computationGraphConfiguration; + this.numInputArrays = computationGraphConfiguration.getNetworkInputs().size(); + this.numOutputArrays = computationGraphConfiguration.getNetworkOutputs().size(); this.inputs = new INDArray[numInputArrays]; this.labels = new INDArray[numOutputArrays]; - this.defaultConfiguration = configuration.getDefaultConfiguration(); + this.defaultConfiguration = computationGraphConfiguration.getDefaultConfiguration(); //Working memory: should learn over course of: (a) full forward pass, and (b) full backward pass //Working memory should be opened once per vertex, for each of forward and backward passes - int numWorkingMem = 2 * configuration.getVertices().size(); + int numWorkingMem = 2 * computationGraphConfiguration.getVertices().size(); WS_LAYER_WORKING_MEM_CONFIG = WorkspaceConfiguration.builder() .initialSize(0) .overallocationLimit(0.02) @@ -238,7 +247,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { .initialSize(0) .overallocationLimit(0.02) .policyLearning(LearningPolicy.OVER_TIME) - .cyclesBeforeInitialization(configuration.getVertices().size()) + .cyclesBeforeInitialization(computationGraphConfiguration.getVertices().size()) .policyReset(ResetPolicy.BLOCK_LEFT) .policySpill(SpillPolicy.REALLOCATE) .policyAllocation(AllocationPolicy.OVERALLOCATE) @@ -278,14 +287,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { } } - /** - * This method returns configuration of this ComputationGraph - * - * @return - */ - public ComputationGraphConfiguration getConfiguration() { - return configuration; - } + /** * Returns the number of layers in the ComputationGraph @@ -313,7 +315,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { * Get a given layer by name. */ public Layer getLayer(String name) { - Preconditions.checkState(verticesMap.containsKey(name), "Layer with name %s does not exist in the network", name); + Preconditions.checkState(verticesMap.containsKey(name), "ILayer with name %s does not exist in the network", name); return verticesMap.get(name).getLayer(); } @@ -449,7 +451,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { if (initCalled) return; - DataType netDtype = getConfiguration().getDataType(); + DataType netDtype = this.getComputationGraphConfiguration().getDataType(); if(parameters != null && parameters.dataType() != netDtype){ Preconditions.checkState(parameters.rank() == 2 && parameters.size(0) == 1, "Invalid parameters array: should be rank 2 with shape [1,numParams]. Got %ndShape", parameters); if(cloneParametersArray){ @@ -463,31 +465,31 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { } } - if (configuration.getTrainingWorkspaceMode() == null) - configuration.setTrainingWorkspaceMode(WorkspaceMode.NONE); + if (computationGraphConfiguration.getTrainingWorkspaceMode() == null) + computationGraphConfiguration.setTrainingWorkspaceMode(WorkspaceMode.NONE); - if (configuration.getInferenceWorkspaceMode() == null) - configuration.setInferenceWorkspaceMode(WorkspaceMode.NONE); + if (computationGraphConfiguration.getInferenceWorkspaceMode() == null) + computationGraphConfiguration.setInferenceWorkspaceMode(WorkspaceMode.NONE); - if (configuration.getCacheMode() == null) - configuration.setCacheMode(CacheMode.NONE); + if (computationGraphConfiguration.getCacheMode() == null) + computationGraphConfiguration.setCacheMode(CacheMode.NONE); OneTimeLogger.info(log, "Starting ComputationGraph with WorkspaceModes set to [training: {}; inference: {}], cacheMode set to [{}]", - configuration.getTrainingWorkspaceMode(), configuration.getInferenceWorkspaceMode(), configuration.getCacheMode()); + computationGraphConfiguration.getTrainingWorkspaceMode(), computationGraphConfiguration.getInferenceWorkspaceMode(), computationGraphConfiguration.getCacheMode()); //First: build topological ordering, based on configuration. Used for forward pass, backprop and order of parameters/gradients GraphIndices indices = calculateIndices(); topologicalOrder = indices.getTopologicalSortOrder(); //Initialization: create the GraphVertex objects, based on configuration structure - Map configVertexMap = configuration.getVertices(); + Map configVertexMap = computationGraphConfiguration.getVertices(); //Names of all of the (data) inputs to the ComputationGraph - List networkInputNames = configuration.getNetworkInputs(); + List networkInputNames = computationGraphConfiguration.getNetworkInputs(); //Inputs for each layer and GraphNode: - Map> vertexInputs = configuration.getVertexInputs(); - this.vertices = new GraphVertex[networkInputNames.size() + configuration.getVertices().size()]; + Map> vertexInputs = computationGraphConfiguration.getVertexInputs(); + this.vertices = new GraphVertex[networkInputNames.size() + computationGraphConfiguration.getVertices().size()]; //All names: inputs, layers and graph nodes (index to name map) Map allNamesReverse = new HashMap<>(); @@ -504,7 +506,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { long numParams = 0; long[] numParamsForVertex = new long[topologicalOrder.length]; int i = 0; - for (; i < configuration.getNetworkInputs().size(); i++) { + for (; i < computationGraphConfiguration.getNetworkInputs().size(); i++) { numParamsForVertex[i] = 0; //No parameters for input vertices } for(; i < topologicalOrder.length; i++) { @@ -513,7 +515,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { n.setDataType(netDtype); numParamsForVertex[i] = n.numParams(true); if(numParamsForVertex[i] < 0) - throw new DL4JInvalidConfigException("Layer " + name + " had parameters < 0 " + numParamsForVertex[i]); + throw new DL4JInvalidConfigException("ILayer " + name + " had parameters < 0 " + numParamsForVertex[i]); numParams += numParamsForVertex[i]; } @@ -564,7 +566,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { List tempLayerList = new ArrayList<>(); defaultConfiguration.clearVariables(); List variables = defaultConfiguration.variables(false); - i = configuration.getNetworkInputs().size(); + i = computationGraphConfiguration.getNetworkInputs().size(); for(; i> seenAsInputTo = new HashMap<>(); - for(Map.Entry> entry : configuration.getVertexInputs().entrySet()){ + for(Map.Entry> entry : computationGraphConfiguration.getVertexInputs().entrySet()){ for(String s : entry.getValue() ){ if (!seenAsInputTo.containsKey(s)) { seenAsInputTo.put(s, new ArrayList()); @@ -709,10 +711,10 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { for(Layer l : layers){ String layerName = l.conf().getLayer().getLayerName(); - List inputs = configuration.getVertexInputs().get(layerName); + List inputs = computationGraphConfiguration.getVertexInputs().get(layerName); String in = inputs.get(0); //For now: layers should have exactly 1 input - if(configuration.getNetworkInputs().contains(in)){ + if(computationGraphConfiguration.getNetworkInputs().contains(in)){ //TODO When is it safe to NOT allow input modifucation? It's not always safe... // For example dropout + iterating over List that is used for multiple epochs... continue; @@ -761,10 +763,10 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { long numParams = 0; long[] numParamsForVertex = new long[topologicalOrder.length]; int i = 0; - for (; i < configuration.getNetworkInputs().size(); i++) { + for (; i < computationGraphConfiguration.getNetworkInputs().size(); i++) { numParamsForVertex[i] = 0; //No parameters for input vertices } - Map configVertexMap = configuration.getVertices(); + Map configVertexMap = computationGraphConfiguration.getVertices(); for (; i < topologicalOrder.length; i++) { String name = indices.getIdxToName().get(i); org.deeplearning4j.nn.conf.graph.GraphVertex n = configVertexMap.get(name); @@ -796,7 +798,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { if(outputLayerIdxs == null) { outputLayerIdxs = new int[numOutputArrays]; int i = 0; - for (String s : configuration.getNetworkOutputs()) { + for (String s : computationGraphConfiguration.getNetworkOutputs()) { outputLayerIdxs[i++] = verticesMap.get(s).getVertexIndex(); } } @@ -875,7 +877,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { /** * Pretrain a specified layer with the given DataSetIterator * - * @param layerName Layer name + * @param layerName ILayer name * @param dataSetIterator Data */ public void pretrainLayer(String layerName, DataSetIterator dataSetIterator) { @@ -890,7 +892,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { /** * Pretrain a specified layer with the given MultiDataSetIterator * - * @param layerName Layer name + * @param layerName ILayer name * @param iter Training data */ public void pretrainLayer(String layerName, MultiDataSetIterator iter) { @@ -920,7 +922,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { int idx = toTrain.getVertexIndex(); LayerWorkspaceMgr workspaceMgr; - if(configuration.getTrainingWorkspaceMode() == WorkspaceMode.NONE){ + if(computationGraphConfiguration.getTrainingWorkspaceMode() == WorkspaceMode.NONE){ workspaceMgr = LayerWorkspaceMgr.noWorkspaces(); } else { workspaceMgr = LayerWorkspaceMgr.builder() @@ -1133,7 +1135,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { update(TaskUtils.buildTask(inputs, labels)); LayerWorkspaceMgr workspaceMgr; - if(configuration.getTrainingWorkspaceMode() == WorkspaceMode.NONE){ + if(computationGraphConfiguration.getTrainingWorkspaceMode() == WorkspaceMode.NONE){ workspaceMgr = LayerWorkspaceMgr.noWorkspaces(); } else { workspaceMgr = LayerWorkspaceMgr.builder() @@ -1151,7 +1153,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { } workspaceMgr.setHelperWorkspacePointers(helperWorkspaces); - if (configuration.getBackpropType() == BackpropType.TruncatedBPTT) { + if (computationGraphConfiguration.getBackpropType() == BackpropType.TruncatedBPTT) { doTruncatedBPTT(inputs, labels, featureMaskArrays, labelMaskArrays, workspaceMgr); } else { if (solver == null) { @@ -1202,9 +1204,9 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { //Get cached topological sort order from config, if present - if(configuration.getTopologicalOrder() != null && configuration.getTopologicalOrderStr() != null){ - int[] t = configuration.getTopologicalOrder(); - List s = configuration.getTopologicalOrderStr(); + if(computationGraphConfiguration.getTopologicalOrder() != null && computationGraphConfiguration.getTopologicalOrderStr() != null){ + int[] t = computationGraphConfiguration.getTopologicalOrder(); + List s = computationGraphConfiguration.getTopologicalOrderStr(); Map m1 = new HashMap<>(); Map m2 = new HashMap<>(); for( int i=0; i nodeMap = configuration.getVertices(); - List networkInputNames = configuration.getNetworkInputs(); - int numVertices = networkInputNames.size() + configuration.getVertices().size(); + Map nodeMap = computationGraphConfiguration.getVertices(); + List networkInputNames = computationGraphConfiguration.getNetworkInputs(); + int numVertices = networkInputNames.size() + computationGraphConfiguration.getVertices().size(); int[] out = new int[numVertices]; int outCounter = 0; @@ -1233,7 +1235,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { Map vertexNamesMap = new HashMap<>(); Map vertexNamesMap2 = new HashMap<>(); int i = 0; - for (String inputName : configuration.getNetworkInputs()) { + for (String inputName : computationGraphConfiguration.getNetworkInputs()) { vertexNamesMap.put(i, inputName); vertexNamesMap2.put(inputName, i); i++; @@ -1248,7 +1250,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { Map> inputEdges = new HashMap<>(); //key: vertex. Values: vertices that the key vertex receives input from Map> outputEdges = new HashMap<>(); //key: vertex. Values: vertices that the key vertex outputs to - for (String s : configuration.getNetworkInputs()) { + for (String s : computationGraphConfiguration.getNetworkInputs()) { int idx = vertexNamesMap2.get(s); inputEdges.put(idx, null); } @@ -1256,7 +1258,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { for (Map.Entry entry : nodeMap.entrySet()) { String thisVertexName = entry.getKey(); int idx = vertexNamesMap2.get(thisVertexName); - List inputsToThisVertex = configuration.getVertexInputs().get(thisVertexName); + List inputsToThisVertex = computationGraphConfiguration.getVertexInputs().get(thisVertexName); if (inputsToThisVertex == null || inputsToThisVertex.isEmpty()) { inputEdges.put(idx, null); @@ -1324,8 +1326,8 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { for( int idx : out){ s.add(vertexNamesMap.get(idx)); } - configuration.setTopologicalOrder(out); - configuration.setTopologicalOrderStr(s); + computationGraphConfiguration.setTopologicalOrder(out); + computationGraphConfiguration.setTopologicalOrderStr(s); graphIndices = GraphIndices.builder() .topologicalSortOrder(out) @@ -1344,7 +1346,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { synchronizeIterEpochCounts(); LayerWorkspaceMgr workspaceMgr; - if(configuration.getTrainingWorkspaceMode() == WorkspaceMode.NONE){ + if(computationGraphConfiguration.getTrainingWorkspaceMode() == WorkspaceMode.NONE){ workspaceMgr = LayerWorkspaceMgr.noWorkspaces(); } else { workspaceMgr = LayerWorkspaceMgr.builder() @@ -1362,7 +1364,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { } workspaceMgr.setHelperWorkspacePointers(helperWorkspaces); - boolean tbptt = configuration.getBackpropType() == BackpropType.TruncatedBPTT; + boolean tbptt = computationGraphConfiguration.getBackpropType() == BackpropType.TruncatedBPTT; FwdPassType fwdType = (tbptt ? FwdPassType.RNN_ACTIVATE_WITH_STORED_STATE : FwdPassType.STANDARD); synchronizeIterEpochCounts(); @@ -1386,7 +1388,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { score = 0.0; int outNum = 0; - for (String s : configuration.getNetworkOutputs()) { + for (String s : computationGraphConfiguration.getNetworkOutputs()) { GraphVertex gv = verticesMap.get(s); if(gv instanceof LayerVertex) { //At this point: the input to the output layer might not be set on the layer itself - just the vertex @@ -1863,7 +1865,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { int[] layerNums = new int[layers.size()]; for( int i=0; i freeWorkspaceManagers = new ArrayList<>(); //Basically used as a stack Map openActivationsWorkspaces = new IdentityHashMap<>(); - WorkspaceMode wsm = (train ? configuration.getTrainingWorkspaceMode() : configuration.getInferenceWorkspaceMode()); + WorkspaceMode wsm = (train ? computationGraphConfiguration.getTrainingWorkspaceMode() : computationGraphConfiguration.getInferenceWorkspaceMode()); boolean noWS = wsm == WorkspaceMode.NONE; LayerWorkspaceMgr allNone = noWS ? LayerWorkspaceMgr.noWorkspaces(helperWorkspaces) : null; List[] closeAtEndIteraton = (List[])new List[topologicalOrder.length]; @@ -2438,7 +2440,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { } } else if (fwdPassType == FwdPassType.RNN_TIMESTEP) { if (current.hasLayer()) { - //Layer + //ILayer INDArray input = current.getInputs()[0]; Layer l = current.getLayer(); if (l instanceof RecurrentLayer) { @@ -2562,7 +2564,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { try { - calcBackpropGradients(true, configuration.getBackpropType() == BackpropType.TruncatedBPTT, epsilons); + calcBackpropGradients(true, computationGraphConfiguration.getBackpropType() == BackpropType.TruncatedBPTT, epsilons); return gradient; } catch (OutOfMemoryError e){ CrashReportingUtil.writeMemoryCrashDump(this, e); @@ -2595,19 +2597,19 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { consumed by all layers */ - if(externalEpsilons == null || externalEpsilons.length == 0 && configuration.getTrainingWorkspaceMode() != WorkspaceMode.NONE){ + if(externalEpsilons == null || externalEpsilons.length == 0 && computationGraphConfiguration.getTrainingWorkspaceMode() != WorkspaceMode.NONE){ WorkspaceUtils.assertOpenAndActive(WS_ALL_LAYERS_ACT, "Expected workspace WS_ALL_LAYERS_ACT to be active and open" + " in calcBackpropGradients when workspace mode is not set to NONE"); } //Validate the network configuration for external errors - no output layers if(externalEpsilons != null && externalEpsilons.length > 0){ - List outputLayers = configuration.getNetworkOutputs(); + List outputLayers = computationGraphConfiguration.getNetworkOutputs(); for(String s : outputLayers ){ GraphVertex gv = getVertex(s); if(gv instanceof LayerVertex && gv.getLayer() instanceof IOutputLayer){ throw new IllegalStateException("Cannot perform backprop with external errors in conjunction with an output layer:" + - " output layers cannot use external errors for backprop. Layer name: " + s); + " output layers cannot use external errors for backprop. ILayer name: " + s); } } @@ -2643,7 +2645,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { } - boolean noWS = configuration.getInferenceWorkspaceMode() == WorkspaceMode.NONE; + boolean noWS = computationGraphConfiguration.getInferenceWorkspaceMode() == WorkspaceMode.NONE; LayerWorkspaceMgr allNone = noWS ? LayerWorkspaceMgr.noWorkspaces(helperWorkspaces) : null; List allWorkspaceManagers = new ArrayList<>(); @@ -2722,7 +2724,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { //(a) it's an output layer (i.e., instanceof IOutputLayer), or //(b) it's a normal layer, but it has been marked as an output layer for use in external errors - for reinforcement learning, for example - int thisOutputNumber = configuration.getNetworkOutputs().indexOf(current.getVertexName()); + int thisOutputNumber = computationGraphConfiguration.getNetworkOutputs().indexOf(current.getVertexName()); Layer currentLayer = current.getLayer(); if (currentLayer instanceof FrozenLayerWithBackprop) { currentLayer = ((FrozenLayerWithBackprop) currentLayer).getInsideLayer(); @@ -2735,7 +2737,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { } else { if ((externalEpsilons == null || externalEpsilons.length == 0) && labels[thisOutputNumber] != null) { - throw new DL4JException("Layer \"" + current.getVertexName() + "\" of type " + throw new DL4JException("ILayer \"" + current.getVertexName() + "\" of type " + current.getLayer().getClass().getSimpleName() + " is set as network output " + "(but isn't an IOutputLayer). Only IOutputLayer layers can be fit via backprop with" @@ -2882,7 +2884,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { @Override public ComputationGraph clone() { - ComputationGraph cg = new ComputationGraph(configuration.clone()); + ComputationGraph cg = new ComputationGraph(computationGraphConfiguration.clone()); cg.init(params().dup(), false); if (solver != null) { //If solver is null: updater hasn't been initialized -> getUpdater call will force initialization, however @@ -3019,7 +3021,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { if (outputLayerIdx >= numOutputArrays) throw new IllegalArgumentException("Invalid index: cannot get output layer " + outputLayerIdx + ", total number of network outputs = " + numOutputArrays); - return getLayer(configuration.getNetworkOutputs().get(outputLayerIdx)); + return getLayer(computationGraphConfiguration.getNetworkOutputs().get(outputLayerIdx)); } /** @@ -3086,7 +3088,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { private double scoreHelper(MultiDataSet dataSet, boolean training){ LayerWorkspaceMgr mgr; - WorkspaceMode wsm = (training ? configuration.getTrainingWorkspaceMode() : configuration.getInferenceWorkspaceMode()); + WorkspaceMode wsm = (training ? computationGraphConfiguration.getTrainingWorkspaceMode() : computationGraphConfiguration.getInferenceWorkspaceMode()); if(wsm == WorkspaceMode.NONE){ mgr = LayerWorkspaceMgr.noWorkspaces(); } else { @@ -3120,7 +3122,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { double r = calcRegularizationScore(true); int i = 0; - for (String s : configuration.getNetworkOutputs()) { + for (String s : computationGraphConfiguration.getNetworkOutputs()) { GraphVertex gv = verticesMap.get(s); Layer outLayer = gv.getLayer(); if (outLayer == null || !(outLayer instanceof IOutputLayer)) { @@ -3180,7 +3182,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { private INDArray scoreExamplesHelper(MultiDataSet dataSet, boolean addRegularizationTerms){ LayerWorkspaceMgr mgr; - if(configuration.getInferenceWorkspaceMode() == WorkspaceMode.NONE){ + if(computationGraphConfiguration.getInferenceWorkspaceMode() == WorkspaceMode.NONE){ mgr = LayerWorkspaceMgr.noWorkspaces(); } else { mgr = LayerWorkspaceMgr.builder() @@ -3212,7 +3214,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { double r = (addRegularizationTerms ? calcRegularizationScore(true) : 0.0); int i = 0; - for (String s : configuration.getNetworkOutputs()) { + for (String s : computationGraphConfiguration.getNetworkOutputs()) { GraphVertex gv = verticesMap.get(s); Layer outLayer = gv.getLayer(); if (outLayer == null || !(outLayer instanceof IOutputLayer)) { @@ -3640,7 +3642,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { } if (l == null || !(l instanceof RecurrentLayer)) { throw new UnsupportedOperationException( - "Layer \"" + layerName + "\" is not a recurrent layer. Cannot set state"); + "ILayer \"" + layerName + "\" is not a recurrent layer. Cannot set state"); } ((RecurrentLayer) l).rnnSetPreviousState(state); } @@ -3704,7 +3706,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { } } - long fwdLen = configuration.getTbpttFwdLength(); + long fwdLen = computationGraphConfiguration.getTbpttFwdLength(); long nSubsets = timeSeriesLength / fwdLen; if (timeSeriesLength % fwdLen != 0) nSubsets++; @@ -3882,7 +3884,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { // This output doesn't have a mask, we can skip it. continue; } - String outputName = configuration.getNetworkOutputs().get(i); + String outputName = computationGraphConfiguration.getNetworkOutputs().get(i); GraphVertex v = verticesMap.get(outputName); Layer ol = v.getLayer(); ol.setMaskArray(labelMaskArrays[i]); @@ -3972,7 +3974,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { labelsList = iterator.getLabels(); Layer outputLayer = getOutputLayer(0); - if(getConfiguration().isValidateOutputLayerConfig()){ + if(this.getComputationGraphConfiguration().isValidateOutputLayerConfig()){ OutputLayerUtil.validateOutputLayerForClassifierEvaluation(outputLayer.conf().getLayer(), Evaluation.class); } @@ -3990,7 +3992,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { */ public T evaluate(MultiDataSetIterator iterator, List labelsList, int topN) { Layer outputLayer = getOutputLayer(0); - if(getConfiguration().isValidateOutputLayerConfig()){ + if(this.getComputationGraphConfiguration().isValidateOutputLayerConfig()){ OutputLayerUtil.validateOutputLayerForClassifierEvaluation(outputLayer.conf().getLayer(), Evaluation.class); } return (T)doEvaluation(iterator, new org.deeplearning4j.eval.Evaluation(labelsList, topN))[0]; @@ -4055,7 +4057,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { */ public T evaluateROC(DataSetIterator iterator, int rocThresholdSteps) { Layer outputLayer = getOutputLayer(0); - if(getConfiguration().isValidateOutputLayerConfig()){ + if(this.getComputationGraphConfiguration().isValidateOutputLayerConfig()){ OutputLayerUtil.validateOutputLayerForClassifierEvaluation(outputLayer.conf().getLayer(), ROC.class); } return (T)doEvaluation(iterator, new org.deeplearning4j.eval.ROC(rocThresholdSteps))[0]; @@ -4078,7 +4080,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { */ public T evaluateROC(MultiDataSetIterator iterator, int rocThresholdSteps) { Layer outputLayer = getOutputLayer(0); - if(getConfiguration().isValidateOutputLayerConfig()){ + if(this.getComputationGraphConfiguration().isValidateOutputLayerConfig()){ OutputLayerUtil.validateOutputLayerForClassifierEvaluation(outputLayer.conf().getLayer(), ROC.class); } return (T)doEvaluation(iterator, new org.deeplearning4j.eval.ROC(rocThresholdSteps))[0]; @@ -4101,7 +4103,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { */ public T evaluateROCMultiClass(DataSetIterator iterator, int rocThresholdSteps) { Layer outputLayer = getOutputLayer(0); - if(getConfiguration().isValidateOutputLayerConfig()){ + if(this.getComputationGraphConfiguration().isValidateOutputLayerConfig()){ OutputLayerUtil.validateOutputLayerForClassifierEvaluation(outputLayer.conf().getLayer(), ROCMultiClass.class); } return (T)doEvaluation(iterator, new org.deeplearning4j.eval.ROCMultiClass(rocThresholdSteps))[0]; @@ -4116,7 +4118,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { */ public T evaluateROCMultiClass(MultiDataSetIterator iterator, int rocThresholdSteps) { Layer outputLayer = getOutputLayer(0); - if(getConfiguration().isValidateOutputLayerConfig()){ + if(this.getComputationGraphConfiguration().isValidateOutputLayerConfig()){ OutputLayerUtil.validateOutputLayerForClassifierEvaluation(outputLayer.conf().getLayer(), ROCMultiClass.class); } return (T)doEvaluation(iterator, new org.deeplearning4j.eval.ROCMultiClass(rocThresholdSteps))[0]; @@ -4202,13 +4204,13 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { MultiDataSetIterator iter = iterator.asyncSupported() ? new AsyncMultiDataSetIterator(iterator, 2, true) : iterator; - WorkspaceMode cMode = configuration.getTrainingWorkspaceMode(); - configuration.setTrainingWorkspaceMode(configuration.getInferenceWorkspaceMode()); + WorkspaceMode cMode = computationGraphConfiguration.getTrainingWorkspaceMode(); + computationGraphConfiguration.setTrainingWorkspaceMode(computationGraphConfiguration.getInferenceWorkspaceMode()); - boolean useRnnSegments = (configuration.getBackpropType() == BackpropType.TruncatedBPTT); + boolean useRnnSegments = (computationGraphConfiguration.getBackpropType() == BackpropType.TruncatedBPTT); MemoryWorkspace outputWs; - if(getConfiguration().getInferenceWorkspaceMode() == WorkspaceMode.ENABLED){ + if(this.getComputationGraphConfiguration().getInferenceWorkspaceMode() == WorkspaceMode.ENABLED){ outputWs = Nd4j.getWorkspaceManager().getWorkspaceForCurrentThread(WS_ALL_LAYERS_ACT_CONFIG, WS_OUTPUT_MEM); } else { outputWs = new DummyWorkspace(); @@ -4256,7 +4258,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { } else { rnnClearPreviousState(); - int fwdLen = configuration.getTbpttFwdLength(); + int fwdLen = computationGraphConfiguration.getTbpttFwdLength(); long tsLength = -1; long nF = next.getFeatures().length; for (int i = 0; i < nF; i++) { @@ -4309,7 +4311,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { if (iterator.asyncSupported()) ((AsyncMultiDataSetIterator) iter).shutdown(); - configuration.setTrainingWorkspaceMode(cMode); + computationGraphConfiguration.setTrainingWorkspaceMode(cMode); return evaluations; } @@ -4380,9 +4382,9 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { String out = "-"; String paramShape = "-"; if (currentVertex.isInputVertex()) { - if (inputTypes != null) vertexOutputs.put(currentVertexName, inputTypes[configuration.getNetworkInputs().indexOf(currentVertexName)]); //for input vertices the outputs are just the input types (only layer vertices have preprocessing?) + if (inputTypes != null) vertexOutputs.put(currentVertexName, inputTypes[computationGraphConfiguration.getNetworkInputs().indexOf(currentVertexName)]); //for input vertices the outputs are just the input types (only layer vertices have preprocessing?) } else { - connections = configuration.getVertexInputs().get(currentVertexName).toString(); + connections = computationGraphConfiguration.getVertexInputs().get(currentVertexName).toString(); List inputTypeList = new ArrayList<>(); if (currentVertex.hasLayer()) { Layer currentLayer = currentVertex.getLayer(); @@ -4425,7 +4427,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { inShape = currentInType.toString(); inputTypeList.add(currentInType); - InputPreProcessor layerVertexPreProcesor = ((org.deeplearning4j.nn.conf.graph.LayerVertex)configuration.getVertices().get(currentVertexName)).getPreProcessor(); + InputPreProcessor layerVertexPreProcesor = ((org.deeplearning4j.nn.conf.graph.LayerVertex) computationGraphConfiguration.getVertices().get(currentVertexName)).getPreProcessor(); if (layerVertexPreProcesor != null) { inShape += "-->" + layerVertexPreProcesor.getOutputType(currentInType); } @@ -4444,7 +4446,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { } } if (inputTypes != null) { - InputType currentVertexOutputType = configuration.getVertices().get(currentVertexName).getOutputType(currLayerIdx, inputTypeList.toArray(new InputType[inputTypeList.size()])); + InputType currentVertexOutputType = computationGraphConfiguration.getVertices().get(currentVertexName).getOutputType(currLayerIdx, inputTypeList.toArray(new InputType[inputTypeList.size()])); outShape = currentVertexOutputType.toString(); vertexOutputs.put(currentVertexName, currentVertexOutputType); } @@ -4546,14 +4548,14 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { * The current epoch count can be obtained using {@code ComputationGraph.getConfiguration().getEpochCount()} */ public void incrementEpochCount(){ - configuration.setEpochCount(configuration.getEpochCount() + 1); + computationGraphConfiguration.setEpochCount(computationGraphConfiguration.getEpochCount() + 1); synchronizeIterEpochCounts(); } protected void synchronizeIterEpochCounts(){ //TODO: this is necessrry for some schedules - but the redundant values are a little ugly... - int currIter = getConfiguration().getIterationCount(); - int currEpoch = getConfiguration().getEpochCount(); + int currIter = this.getComputationGraphConfiguration().getIterationCount(); + int currEpoch = this.getComputationGraphConfiguration().getEpochCount(); for(Layer l : layers){ l.setIterationCount(currIter); l.setEpochCount(currEpoch); @@ -4565,7 +4567,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { * @return Number of iterations */ public int getIterationCount(){ - return configuration.getIterationCount(); + return computationGraphConfiguration.getIterationCount(); } /** @@ -4576,7 +4578,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { * @return Number of epochs */ public int getEpochCount(){ - return configuration.getEpochCount(); + return computationGraphConfiguration.getEpochCount(); } /** @@ -4633,7 +4635,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { try(MemoryWorkspace ws = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { INDArray newParams = params().castTo(dataType); - String jsonConfig = getConfiguration().toJson(); + String jsonConfig = this.getComputationGraphConfiguration().toJson(); ComputationGraphConfiguration newConf = ComputationGraphConfiguration.fromJson(jsonConfig); newConf.setDataType(dataType); ComputationGraph newNet = new ComputationGraph(newConf); @@ -4714,7 +4716,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { /** * Get the current learning rate, for the specified layer, from the network. * Note: If the layer has no learning rate (no parameters, or an updater without a learning rate) then null is returned - * @param layerName Layer name + * @param layerName ILayer name * @return Learning rate for the specified layer, or null */ public Double getLearningRate(String layerName){ @@ -4724,7 +4726,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { /** * Return the layer size (number of units) for the specified layer. * Note that the meaning of the "layer size" can depend on the type of layer. For example:
- * - DenseLayer, OutputLayer, recurrent layers: number of units (nOut configuration option)
+ * - DenseLayerConfiguration, OutputLayer, recurrent layers: number of units (nOut configuration option)
* - ConvolutionLayer: the channels (number of channels)
* - Subsampling layers, global pooling layers, etc: size of 0 is always returned
* @@ -4733,7 +4735,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { */ public long layerSize(int layer) { if (layer < 0 || layer > layers.length) { - throw new IllegalArgumentException("Invalid layer index: " + layer + ". Layer index must be between 0 and " + throw new IllegalArgumentException("Invalid layer index: " + layer + ". ILayer index must be between 0 and " + (layers.length - 1) + " inclusive"); } return layerSize(layers[layer].conf().getLayer().getLayerName()); @@ -4742,7 +4744,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { /** * Return the input size (number of inputs) for the specified layer.
* Note that the meaning of the "input size" can depend on the type of layer. For example:
- * - DenseLayer, OutputLayer, etc: the feature vector size (nIn configuration option)
+ * - DenseLayerConfiguration, OutputLayer, etc: the feature vector size (nIn configuration option)
* - Recurrent layers: the feature vector size per time step (nIn configuration option)
* - ConvolutionLayer: the channels (number of channels)
* - Subsampling layers, global pooling layers, etc: size of 0 is always returned
@@ -4752,7 +4754,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { */ public long layerInputSize(int layer) { if (layer < 0 || layer > layers.length) { - throw new IllegalArgumentException("Invalid layer index: " + layer + ". Layer index must be between 0 and " + throw new IllegalArgumentException("Invalid layer index: " + layer + ". ILayer index must be between 0 and " + (layers.length - 1) + " inclusive"); } return layerInputSize(layers[layer].conf().getLayer().getLayerName()); @@ -4761,7 +4763,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { /** * Return the layer size (number of units) for the specified layer.
* Note that the meaning of the "layer size" can depend on the type of layer. For example:
- * - DenseLayer, OutputLayer, recurrent layers: number of units (nOut configuration option)
+ * - DenseLayerConfiguration, OutputLayer, recurrent layers: number of units (nOut configuration option)
* - ConvolutionLayer: the channels (number of channels)
* - Subsampling layers, global pooling layers, etc: size of 0 is always returned
* @@ -4785,7 +4787,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { /** * Return the input size (number of inputs) for the specified layer.
* Note that the meaning of the "input size" can depend on the type of layer. For example:
- * - DenseLayer, OutputLayer, etc: the feature vector size (nIn configuration option)
+ * - DenseLayerConfiguration, OutputLayer, etc: the feature vector size (nIn configuration option)
* - Recurrent layers: the feature vector size per time step (nIn configuration option)
* - ConvolutionLayer: the channels (number of channels)
* - Subsampling layers, global pooling layers, etc: size of 0 is always returned
@@ -4860,7 +4862,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { if (obj instanceof ComputationGraph) { ComputationGraph network = (ComputationGraph) obj; boolean paramsEquals = network.params().equals(params()); - boolean confEquals = getConfiguration().equals(network.getConfiguration()); + boolean confEquals = this.getComputationGraphConfiguration().equals(network.getComputationGraphConfiguration()); boolean updaterEquals = getUpdater().equals(network.getUpdater()); return paramsEquals && confEquals && updaterEquals; } @@ -4875,7 +4877,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { val cg = ModelSerializer.restoreComputationGraph(ois, true); this.defaultConfiguration = cg.defaultConfiguration.clone(); - this.configuration = cg.configuration.clone(); + this.computationGraphConfiguration = cg.computationGraphConfiguration.clone(); this.init(); this.flattenedParams.assign(cg.flattenedParams); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/BaseGraphVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/BaseGraphVertex.java index afffe99d4..cdb124d75 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/BaseGraphVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/BaseGraphVertex.java @@ -58,8 +58,8 @@ public abstract class BaseGraphVertex implements GraphVertex { protected INDArray[] inputs; protected INDArray epsilon; - //Set outputVertex to true when Layer is an OutputLayer, OR For use in specialized situations like reinforcement learning - // For RL situations, this Layer insn't an OutputLayer, but is the last layer in a graph, that gets its error/epsilon + //Set outputVertex to true when ILayer is an OutputLayer, OR For use in specialized situations like reinforcement learning + // For RL situations, this ILayer insn't an OutputLayer, but is the last layer in a graph, that gets its error/epsilon // passed in externally @Setter @Getter protected boolean outputVertex; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/GraphVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/GraphVertex.java index 73e4b2fc4..61136e0db 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/GraphVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/GraphVertex.java @@ -40,7 +40,7 @@ public interface GraphVertex extends Trainable, Serializable { /** Get the index of the GraphVertex */ int getVertexIndex(); - /** Get the number of input arrays. For example, a Layer may have only one input array, but in general a GraphVertex + /** Get the number of input arrays. For example, a ILayer may have only one input array, but in general a GraphVertex * may have an arbtrary (>=1) number of input arrays (for example, from multiple other layers) */ int getNumInputArrays(); @@ -85,7 +85,7 @@ public interface GraphVertex extends Trainable, Serializable { /** Set the GraphVertex to be an output vertex */ void setOutputVertex(boolean outputVertex); - /** Get the Layer (if any). Returns null if {@link #hasLayer()} == false */ + /** Get the ILayer (if any). Returns null if {@link #hasLayer()} == false */ Layer getLayer(); /** Set the input activations. diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/LayerVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/LayerVertex.java index fdd05c390..60f3dad0b 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/LayerVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/LayerVertex.java @@ -124,10 +124,10 @@ public class LayerVertex extends BaseGraphVertex { public Pair doBackward(boolean tbptt, LayerWorkspaceMgr workspaceMgr) { if (!canDoBackward()) { if(inputs == null || inputs[0] == null){ - throw new IllegalStateException("Cannot do backward pass: inputs not set. Layer: \"" + vertexName + throw new IllegalStateException("Cannot do backward pass: inputs not set. ILayer: \"" + vertexName + "\" (idx " + vertexIndex + "), numInputs: " + getNumInputArrays()); } else { - throw new IllegalStateException("Cannot do backward pass: all epsilons not set. Layer \"" + vertexName + throw new IllegalStateException("Cannot do backward pass: all epsilons not set. ILayer \"" + vertexName + "\" (idx " + vertexIndex + "), numInputs :" + getNumInputArrays() + "; numOutputs: " + getNumOutputConnections()); } @@ -142,7 +142,7 @@ public class LayerVertex extends BaseGraphVertex { if (tbptt && layer instanceof RecurrentLayer) { //Truncated BPTT for recurrent layers pair = ((RecurrentLayer) layer).tbpttBackpropGradient(epsilon, - graph.getConfiguration().getTbpttBackLength(), workspaceMgr); + graph.getComputationGraphConfiguration().getTbpttBackLength(), workspaceMgr); } else { //Normal backprop pair = layer.backpropGradient(epsilon, workspaceMgr); //epsTotal may be null for OutputLayers diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/rnn/DuplicateToTimeSeriesVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/rnn/DuplicateToTimeSeriesVertex.java index 2bfc6ee97..27eb238d3 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/rnn/DuplicateToTimeSeriesVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/rnn/DuplicateToTimeSeriesVertex.java @@ -48,10 +48,10 @@ public class DuplicateToTimeSeriesVertex extends BaseGraphVertex { VertexIndices[] inputVertices, VertexIndices[] outputVertices, String inputName, DataType dataType) { super(graph, name, vertexIndex, inputVertices, outputVertices, dataType); this.inputName = inputName; - this.inputVertexIndex = graph.getConfiguration().getNetworkInputs().indexOf(inputName); + this.inputVertexIndex = graph.getComputationGraphConfiguration().getNetworkInputs().indexOf(inputName); if (inputVertexIndex == -1) throw new IllegalArgumentException("Invalid input name: \"" + inputName + "\" not found in list " - + "of network inputs (" + graph.getConfiguration().getNetworkInputs() + ")"); + + "of network inputs (" + graph.getComputationGraphConfiguration().getNetworkInputs() + ")"); } @Override diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/rnn/LastTimeStepVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/rnn/LastTimeStepVertex.java index 0475936d0..4402dc4c5 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/rnn/LastTimeStepVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/rnn/LastTimeStepVertex.java @@ -54,10 +54,10 @@ public class LastTimeStepVertex extends BaseGraphVertex { VertexIndices[] outputVertices, String inputName, DataType dataType) { super(graph, name, vertexIndex, inputVertices, outputVertices, dataType); this.inputName = inputName; - this.inputIdx = graph.getConfiguration().getNetworkInputs().indexOf(inputName); + this.inputIdx = graph.getComputationGraphConfiguration().getNetworkInputs().indexOf(inputName); if (inputIdx == -1) throw new IllegalArgumentException("Invalid input name: \"" + inputName + "\" not found in list " - + "of network inputs (" + graph.getConfiguration().getNetworkInputs() + ")"); + + "of network inputs (" + graph.getComputationGraphConfiguration().getNetworkInputs() + ")"); } @Override diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/rnn/ReverseTimeSeriesVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/rnn/ReverseTimeSeriesVertex.java index 359a576a3..86b5dcab3 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/rnn/ReverseTimeSeriesVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/rnn/ReverseTimeSeriesVertex.java @@ -48,10 +48,10 @@ public class ReverseTimeSeriesVertex extends BaseGraphVertex { this.inputIdx = -1; } else { // Find the given input - this.inputIdx = graph.getConfiguration().getNetworkInputs().indexOf(inputName); + this.inputIdx = graph.getComputationGraphConfiguration().getNetworkInputs().indexOf(inputName); if (inputIdx == -1) throw new IllegalArgumentException("Invalid input name: \"" + inputName + "\" not found in list " - + "of network inputs (" + graph.getConfiguration().getNetworkInputs() + ")"); + + "of network inputs (" + graph.getComputationGraphConfiguration().getNetworkInputs() + ")"); } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTMHelpers.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTMHelpers.java index 3ad4f8b0a..fa03d3c51 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTMHelpers.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTMHelpers.java @@ -79,7 +79,7 @@ public class LSTMHelpers { ) { //Mini-batch data format: for mini-batch size m, nIn inputs, and T time series length - //Data has shape [m,nIn,T]. Layer activations/output has shape [m,nHiddenUnits,T] + //Data has shape [m,nIn,T]. ILayer activations/output has shape [m,nHiddenUnits,T] if (input == null || input.length() == 0) throw new IllegalArgumentException("Invalid input: not set or 0 length"); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java index 18397bd4d..0f81392f9 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java @@ -26,6 +26,8 @@ import lombok.NonNull; import lombok.Setter; import lombok.extern.slf4j.Slf4j; import lombok.val; +import net.brutex.ai.dnn.api.INeuralNetwork; +import net.brutex.ai.dnn.networks.ArtificialNeuralNetwork; import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.StringUtils; import org.bytedeco.javacpp.Pointer; @@ -38,9 +40,7 @@ import org.deeplearning4j.nn.api.layers.IOutputLayer; import org.deeplearning4j.nn.api.layers.RecurrentLayer; import org.deeplearning4j.nn.conf.*; import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; import org.deeplearning4j.nn.conf.layers.FeedForwardLayer; -import org.deeplearning4j.nn.conf.layers.SubsamplingLayer; import org.deeplearning4j.nn.conf.layers.recurrent.Bidirectional; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; @@ -99,1097 +99,1235 @@ import org.nd4j.common.util.OneTimeLogger; import java.io.*; import java.util.*; - +/** + * Artificial Neural Network An artificial neural network (1) takes some input data, and (2) + * transforms this input data by calculating a weighted sum over the inputs and (3) applies a + * non-linear function to this transformation to calculate an intermediate state. The three steps + * above constitute what is known as a layer, and the transformative function is often referred to + * as a unit. The intermediate states—often termed features—are used as the input into another + * layer. + *

+ * Through repetition of these steps, the artificial neural network learns multiple layers of + * non-linear features, which it then combines in a final layer to create a prediction. + *

+ * The neural network learns by generating an error signal that measures the difference between the + * predictions of the network and the desired values and then using this error signal to change the + * weights (or parameters) so that predictions get more accurate. + */ @Slf4j -public class MultiLayerNetwork implements Serializable, Classifier, Layer, org.deeplearning4j.nn.api.NeuralNetwork { +public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serializable, Classifier, Layer, + INeuralNetwork { - //the hidden neural network layers (including output layer) - protected Layer[] layers; - protected LinkedHashMap layerMap = new LinkedHashMap<>(); - - //Current training data: input features and labels - protected INDArray input, labels; - - protected boolean initCalled = false; - protected Collection trainingListeners = new ArrayList<>(); - - protected NeuralNetConfiguration defaultConfiguration; - protected MultiLayerConfiguration layerWiseConfigurations; - protected Gradient gradient; - protected double score; - @Setter - protected boolean initDone = false; - protected INDArray flattenedParams; //Params for all layers are a view/subset of this array - @Getter - protected transient INDArray flattenedGradients; //Gradients for all layers are a view/subset of this array - - protected boolean clearTbpttState = true; //Mainly for unit testing (should be enabled otherwise) - protected transient ThreadLocal lastEtlTime = new ThreadLocal<>(); - protected INDArray mask; - - protected int layerIndex; //For Layer.get/setIndex() - - protected transient Solver solver; //Used to call optimizers during backprop - //Workspaces for CUDNN. Pass to LayerWorkspaceMgr for re-use in cudnn helpers - @Getter - protected transient Map helperWorkspaces = new HashMap<>(); + /** + * Workspace for working memory for a single layer: forward pass and backward pass Note that this + * is opened/closed once per op (activate/backpropGradient call) + */ + protected static final String WS_LAYER_WORKING_MEM = "WS_LAYER_WORKING_MEM"; + /** + * Workspace for storing all layers' activations - used only to store activations (layer inputs) + * as part of backprop Not used for inference + */ + protected static final String WS_ALL_LAYERS_ACT = "WS_ALL_LAYERS_ACT"; + /** + * Next 2 workspaces: used for: (a) Inference: holds activations for one layer only (b) Backprop: + * holds activation gradients for one layer only In both cases, they are opened and closed on + * every second layer + */ + protected static final String WS_LAYER_ACT_1 = "WS_LAYER_ACT_1"; + protected static final String WS_LAYER_ACT_2 = "WS_LAYER_ACT_2"; + /** + * Workspace for output methods that use OutputAdapter + */ + protected static final String WS_OUTPUT_MEM = "WS_OUTPUT_MEM"; + /** + * Workspace for working memory in RNNs - opened and closed once per RNN time step + */ + protected static final String WS_RNN_LOOP_WORKING_MEM = "WS_RNN_LOOP_WORKING_MEM"; + protected static final WorkspaceConfiguration WS_ALL_LAYERS_ACT_CONFIG = WorkspaceConfiguration.builder() + .initialSize(0) + .overallocationLimit(0.05) + .policyLearning(LearningPolicy.FIRST_LOOP) + .policyReset(ResetPolicy.BLOCK_LEFT) + .policySpill(SpillPolicy.REALLOCATE) + .policyAllocation(AllocationPolicy.OVERALLOCATE) + .build(); + protected static final WorkspaceConfiguration WS_RNN_LOOP_WORKING_MEM_CONFIG = WorkspaceConfiguration.builder() + .initialSize(0).overallocationLimit(0.05).policyReset(ResetPolicy.BLOCK_LEFT) + .policyAllocation(AllocationPolicy.OVERALLOCATE).policySpill(SpillPolicy.REALLOCATE) + .policyLearning(LearningPolicy.FIRST_LOOP).build(); + //the hidden neural network layers (including output layer) + protected Layer[] layers; + protected LinkedHashMap layerMap = new LinkedHashMap<>(); + //Current training data: input features and labels + protected INDArray input, labels; + protected boolean initCalled = false; + protected Collection trainingListeners = new ArrayList<>(); + protected NeuralNetConfiguration defaultConfiguration; + protected MultiLayerConfiguration layerWiseConfigurations; + protected Gradient gradient; + protected double score; + @Setter + protected boolean initDone = false; + protected INDArray flattenedParams; //Params for all layers are a view/subset of this array + @Getter + protected transient INDArray flattenedGradients; //Gradients for all layers are a view/subset of this array + protected boolean clearTbpttState = true; //Mainly for unit testing (should be enabled otherwise) + protected transient ThreadLocal lastEtlTime = new ThreadLocal<>(); + protected INDArray mask; + protected int layerIndex; //For Layer.get/setIndex() + protected transient Solver solver; //Used to call optimizers during backprop + //Workspaces for CUDNN. Pass to LayerWorkspaceMgr for re-use in cudnn helpers + @Getter + protected transient Map helperWorkspaces = new HashMap<>(); + protected WorkspaceConfiguration WS_LAYER_WORKING_MEM_CONFIG; + protected WorkspaceConfiguration WS_LAYER_ACT_X_CONFIG; - /** - * Workspace for working memory for a single layer: forward pass and backward pass - * Note that this is opened/closed once per op (activate/backpropGradient call) - */ - protected static final String WS_LAYER_WORKING_MEM = "WS_LAYER_WORKING_MEM"; - /** - * Workspace for storing all layers' activations - used only to store activations (layer inputs) as part of backprop - * Not used for inference - */ - protected static final String WS_ALL_LAYERS_ACT = "WS_ALL_LAYERS_ACT"; - /** - * Next 2 workspaces: used for: - * (a) Inference: holds activations for one layer only - * (b) Backprop: holds activation gradients for one layer only - * In both cases, they are opened and closed on every second layer - */ - protected static final String WS_LAYER_ACT_1 = "WS_LAYER_ACT_1"; - protected static final String WS_LAYER_ACT_2 = "WS_LAYER_ACT_2"; + public MultiLayerNetwork(MultiLayerConfiguration conf) { + this.layerWiseConfigurations = conf; + this.defaultConfiguration = conf.getConf(0).clone(); - /** - * Workspace for output methods that use OutputAdapter - */ - protected static final String WS_OUTPUT_MEM = "WS_OUTPUT_MEM"; + //Working memory: should learn over course of: (a) full forward pass, and (b) full backward pass + //Working memory should be opened once per layer and once per preprocessor, for each of forward and backward passes + int numWorkingMem = 2 * (layerWiseConfigurations.getConfs().size() + + layerWiseConfigurations.getInputPreProcessors().size()); + WS_LAYER_WORKING_MEM_CONFIG = getLayerWorkingMemWSConfig(numWorkingMem); + WS_LAYER_ACT_X_CONFIG = getLayerActivationWSConfig(layerWiseConfigurations.getConfs().size()); + } - /** - * Workspace for working memory in RNNs - opened and closed once per RNN time step - */ - protected static final String WS_RNN_LOOP_WORKING_MEM = "WS_RNN_LOOP_WORKING_MEM"; + /** + * Initialize the network based on the configuration (a MultiLayerConfiguration in JSON format) + * and parameters array + * + * @param conf the configuration json + * @param params the parameters for the network + */ + public MultiLayerNetwork(String conf, INDArray params) { + this(MultiLayerConfiguration.fromJson(conf)); + init(); + setParameters(params); + } + /** + * Initialize the network based on the configuration and parameters array + * + * @param conf the configuration + * @param params the parameters + */ + public MultiLayerNetwork(MultiLayerConfiguration conf, INDArray params) { + this(conf); + init(); + setParameters(params); + } - protected WorkspaceConfiguration WS_LAYER_WORKING_MEM_CONFIG; + protected static WorkspaceConfiguration getLayerWorkingMemWSConfig(int numWorkingMemCycles) { + return WorkspaceConfiguration.builder() + .initialSize(0) + .overallocationLimit(0.02) + .policyLearning(LearningPolicy.OVER_TIME) + .cyclesBeforeInitialization(numWorkingMemCycles) + .policyReset(ResetPolicy.BLOCK_LEFT) + .policySpill(SpillPolicy.REALLOCATE) + .policyAllocation(AllocationPolicy.OVERALLOCATE) + .build(); + } - protected static final WorkspaceConfiguration WS_ALL_LAYERS_ACT_CONFIG = WorkspaceConfiguration.builder() - .initialSize(0) - .overallocationLimit(0.05) - .policyLearning(LearningPolicy.FIRST_LOOP) - .policyReset(ResetPolicy.BLOCK_LEFT) - .policySpill(SpillPolicy.REALLOCATE) - .policyAllocation(AllocationPolicy.OVERALLOCATE) + protected static WorkspaceConfiguration getLayerActivationWSConfig(int numLayers) { + //Activations memory: opened once per layer - for every second layer (preprocessors are within the loop). + //Technically we could set learning to numLayers / 2, but will set to numLayers for simplicity, and also to + // account for a backward pass + return WorkspaceConfiguration.builder() + .initialSize(0) + .overallocationLimit(0.02) + .policyLearning(LearningPolicy.OVER_TIME) + .cyclesBeforeInitialization(numLayers) + .policyReset(ResetPolicy.BLOCK_LEFT) + .policySpill(SpillPolicy.REALLOCATE) + .policyAllocation(AllocationPolicy.OVERALLOCATE) + .build(); + } + + /** + * Restore a MultiLayerNetwork to a file, saved using {@link #save(File)} or + * {@link ModelSerializer} + * + * @param f File to load the network from + * @param loadUpdater If true: load the updater if it is available (i.e., the state array for + * momentum/Adam/rmsprop etc) - use false if no further training is + * required, or true if further training will be undertaken + * @see ModelSerializer ModelSerializer for more details (and saving/loading via streams) + */ + public static MultiLayerNetwork load(File f, boolean loadUpdater) throws IOException { + return ModelSerializer.restoreMultiLayerNetwork(f, loadUpdater); + } + + /** + * This method sets specified CacheMode for all layers within network + * + * @param mode + */ + public void setCacheMode(CacheMode mode) { + if (mode == null) { + mode = CacheMode.NONE; + } + + for (Layer layer : layers) { + layer.setCacheMode(mode); + } + } + + /** + * Get the last ETL time. This in informational, and is the amount of time in milliseconds that + * was required to obtain the last DataSet/MultiDataSet during fitting. A value consistently above + * 0 may indicate a data feeding bottleneck, or no asynchronous data prefetching (async prefetch + * is enabled by default) + * + * @return The last ETL time in milliseconds, if avaliable (or 0 if not) + */ + public long getLastEtlTime() { + Long time = lastEtlTime.get(); + return time == null ? 0L : time; + } + + /** + * Set the last ETL time in milliseconds, for informational/reporting purposes. Generally used + * internally. + * + * @param time ETL time + */ + public void setLastEtlTime(long time) { + lastEtlTime.set(time); + } + + protected void intializeConfigurations() { + if (layerWiseConfigurations == null) { + layerWiseConfigurations = new MultiLayerConfiguration.Builder().build(); + } + + if (layers == null) { + layers = new Layer[getnLayers()]; + } + + if (defaultConfiguration == null) { + defaultConfiguration = new NeuralNetConfiguration.Builder().build(); + } + } + + /** + * Perform layerwise pretraining for one epoch - see {@link #pretrain(DataSetIterator, int)} + */ + public void pretrain(DataSetIterator iter) { + pretrain(iter, 1); + } + + /** + * Perform layerwise unsupervised training on all pre-trainable layers in the network (VAEs, + * Autoencoders, etc), for the specified number of epochs each. For example, if numEpochs=3, then + * layer 0 will be fit for 3 epochs, followed by layer 1 for 3 epochs, and so on.
Note that + * pretraining will be performed on one layer after the other. To perform unsupervised training on + * a single layer, use {@link #pretrainLayer(int, DataSetIterator)} + * + * @param iter Training data + */ + public void pretrain(DataSetIterator iter, int numEpochs) { + if (flattenedGradients == null) { + initGradientsView(); + } + + for (int i = 0; i < getnLayers(); i++) { + pretrainLayer(i, iter, numEpochs); + } + } + + /** + * Fit for one epoch - see {@link #pretrainLayer(int, DataSetIterator, int)} + */ + public void pretrainLayer(int layerIdx, DataSetIterator iter) { + pretrainLayer(layerIdx, iter, 1); + } + + /** + * Perform layerwise unsupervised training on a single pre-trainable layer in the network (VAEs, + * Autoencoders, etc) for the specified number of epochs
If the specified layer index (0 to + * numLayers - 1) is not a pretrainable layer, this is a no-op. + * + * @param layerIdx Index of the layer to train (0 to numLayers-1) + * @param iter Training data + * @param numEpochs Number of epochs to fit the specified layer for + */ + public void pretrainLayer(int layerIdx, DataSetIterator iter, int numEpochs) { + Preconditions.checkState(numEpochs > 0, "Number of epochs (%s) must be a positive number", + numEpochs); + + if (flattenedGradients == null) { + initGradientsView(); + } + if (layerIdx >= layers.length) { + throw new IllegalArgumentException( + "Cannot pretrain layer: layerIdx (" + layerIdx + ") >= numLayers (" + layers.length + + ")"); + } + + Layer layer = layers[layerIdx]; + if (!layer.isPretrainLayer()) { + return; + } + + if (numEpochs > 1 && !iter.resetSupported()) { + throw new IllegalStateException("Cannot fit multiple epochs (" + numEpochs + + ") on an iterator that doesn't support resetting"); + } + + if (!iter.hasNext() && iter.resetSupported()) { + iter.reset(); + } + + log.info( + "Starting unsupervised training on layer " + layerIdx + " for " + numEpochs + " epochs"); + for (int i = 0; i < numEpochs; i++) { + if (i > 0) { + iter.reset(); + } + + while (iter.hasNext()) { + DataSet next = iter.next(); + input = next.getFeatures(); + pretrainLayer(layerIdx, input); + } + } + + int ec = getLayer(layerIdx).conf().getEpochCount() + 1; + getLayer(layerIdx).conf().setEpochCount(ec); + } + + /** + * Perform layerwise unsupervised training on a single pre-trainable layer in the network (VAEs, + * Autoencoders, etc)
If the specified layer index (0 to numLayers - 1) is not a pretrainable + * layer, this is a no-op. + * + * @param layerIdx Index of the layer to train (0 to numLayers-1) + * @param features Training data array + */ + public void pretrainLayer(int layerIdx, INDArray features) { + setInput(features); + setLayerMaskArrays(null, null); + + if (flattenedGradients == null) { + initGradientsView(); + } + if (layerIdx >= layers.length) { + throw new IllegalArgumentException( + "Cannot pretrain layer: layerIdx (" + layerIdx + ") >= numLayers (" + layers.length + + ")"); + } + + LayerWorkspaceMgr workspaceMgr; + if (layerWiseConfigurations.getTrainingWorkspaceMode() == WorkspaceMode.NONE) { + workspaceMgr = LayerWorkspaceMgr.noWorkspaces(); + } else { + workspaceMgr = LayerWorkspaceMgr.builder() + .defaultWorkspace(WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) + .with(ArrayType.RNN_FF_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, + WS_RNN_LOOP_WORKING_MEM_CONFIG) + .build(); + } + workspaceMgr.setHelperWorkspacePointers(helperWorkspaces); + + Layer layer = layers[layerIdx]; + if (!layer.isPretrainLayer()) { + return; + } + + //Do forward pass to the layer to be pretrained + INDArray outputOfPrevLayer; + if (layerIdx == 0) { + outputOfPrevLayer = input; + } else { + //Yes, this part of training - but we'll do forward psas as inference mode when doing layerwise training + // to effectively freeze earlier layers and not apply dropout etc + outputOfPrevLayer = outputOfLayerDetached(false, FwdPassType.STANDARD, layerIndex - 1, + features, null, null, null); + } + + try (MemoryWorkspace ws = workspaceMgr.notifyScopeEntered(ArrayType.FF_WORKING_MEM)) { + if (layerWiseConfigurations.getInputPreProcess(layerIdx) != null) { + + if (input.size(0) > Integer.MAX_VALUE) { + throw new ND4JArraySizeException(); + } + outputOfPrevLayer = layerWiseConfigurations.getInputPreProcess(layerIdx) + .preProcess(outputOfPrevLayer, (int) input.size(0), + LayerWorkspaceMgr.noWorkspaces(helperWorkspaces)); + } + + layer.fit(outputOfPrevLayer, workspaceMgr); + } + } + + @Override + public int batchSize() { + //In 99+% of cases, the input and labels dimension 0 size should be identical + //The only real exceptions: space to batch, and batch to space layers + //In those cases, we should base it on the labels size, as this impacts gradient calculation + if (input.size(0) > Integer.MAX_VALUE || labels.size(0) > Integer.MAX_VALUE) { + throw new ND4JArraySizeException(); + } + return labels == null ? (int) input.size(0) : (int) labels.size(0); + } + + @Override + public NeuralNetConfiguration conf() { + return defaultConfiguration; + } + + @Override + public void setConf(NeuralNetConfiguration conf) { + throw new UnsupportedOperationException(); + } + + @Override + public INDArray input() { + return input; + } + + @Override + public ConvexOptimizer getOptimizer() { + return solver.getOptimizer(); + } + + /** + * Get one parameter array for the network.
In MultiLayerNetwork, parameters are keyed like + * "0_W" and "0_b" to mean "weights of layer index 0" and "biases of layer index 0" respectively. + * Numbers increment sequentially, and the suffixes ("W", "b" etc) depend on the layer type, and + * are defined in the relevant parameter initializers for each layer.
Note that the returned + * INDArrays are views of the underlying network parameters, so modifications of the returned + * arrays will impact the parameters of the network. + * + * @param param the key of the parameter + * @return The specified parameter array for the network + * @see #paramTable() paramTable() method, for a map of all parameters + */ + @Override + public INDArray getParam(String param) { + //Get params for MultiLayerNetwork sub layers. + int idx = param.indexOf('_'); + if (idx == -1) { + throw new IllegalStateException( + "Invalid param key: does not have layer separator: \"" + param + "\""); + } + int layerIdx = Integer.parseInt(param.substring(0, idx)); + String newKey = param.substring(idx + 1); + + return layers[layerIdx].getParam(newKey); + } + + /** + * Return a map of all parameters in the network. Parameter names are as described in + * {@link #getParam(String)}. As per {@link #getParam(String)} the returned arrays are views - + * modifications to these will impact the underlying network parameters + * + * @return A map of all parameters in the network + */ + @Override + public Map paramTable() { + return paramTable(false); + } + + /** + * Returns a map of all parameters in the network as per {@link #paramTable()}.
Optionally + * (with backpropParamsOnly=true) only the 'backprop' parameters are returned - that is, any + * parameters involved only in unsupervised layerwise pretraining not standard inference/backprop + * are excluded from the returned list. + * + * @param backpropParamsOnly If true, return backprop params only. If false: return all params + * @return Parameters for the network + */ + public Map paramTable(boolean backpropParamsOnly) { + //Get all parameters from all layers + Map allParams = new LinkedHashMap<>(); + for (int i = 0; i < layers.length; i++) { + Map paramMap = layers[i].paramTable(backpropParamsOnly); + for (Map.Entry entry : paramMap.entrySet()) { + String newKey = i + "_" + entry.getKey(); + allParams.put(newKey, entry.getValue()); + } + } + return allParams; + } + + /** + * Intended for internal use + */ + @Override + public boolean updaterDivideByMinibatch(String paramName) { + int idx = paramName.indexOf('_'); + int layerIdx = Integer.parseInt(paramName.substring(0, idx)); + String subName = paramName.substring(idx + 1); + return getLayer(layerIdx).updaterDivideByMinibatch(subName); + } + + /** + * Set the parameters of the netowrk. Note that the parameter keys must match the format as + * described in {@link #getParam(String)} and {@link #paramTable()}. Note that the values of the + * parameters used as an argument to this method are copied - i.e., it is safe to later + * modify/reuse the values in the provided paramTable without this impacting the network. + * + * @param paramTable Parameters to set + */ + @Override + public void setParamTable(Map paramTable) { + Map currParamTable = paramTable(); + if (!currParamTable.keySet().equals(paramTable.keySet())) { + throw new IllegalArgumentException( + "Cannot set param table: parameter keys do not match.\n" + "Current: " + + currParamTable.keySet() + "\nTo set: " + paramTable.keySet()); + } + + for (String s : paramTable.keySet()) { + INDArray curr = currParamTable.get(s); + INDArray toSet = paramTable.get(s); + if (!Arrays.equals(curr.shape(), toSet.shape())) { + throw new IllegalArgumentException( + "Cannot set parameter table: parameter \"" + s + "\" shapes " + + "do not match. Current = " + Arrays.toString(curr.shape()) + ", to set = " + + Arrays.toString(toSet.shape())); + } + } + + //Now that we've checked ALL params (to avoid leaving net in half-modified state) + for (String s : paramTable.keySet()) { + INDArray curr = currParamTable.get(s); + INDArray toSet = paramTable.get(s); + curr.assign(toSet); + } + } + + /** + * Set the values of a single parameter. See {@link #setParamTable(Map)} and + * {@link #getParam(String)} for more details. + * + * @param key the key of the parameter to set + * @param val the new values for the parameter + */ + @Override + public void setParam(String key, INDArray val) { + //Set params for MultiLayerNetwork sub layers. + int idx = key.indexOf('_'); + if (idx == -1) { + throw new IllegalStateException( + "Invalid param key: not have layer separator: \"" + key + "\""); + } + int layerIdx = Integer.parseInt(key.substring(0, idx)); + String newKey = key.substring(idx + 1); + + layers[layerIdx].setParam(newKey, val); + } + + /** + * Get the configuration for the network + * + * @return Network configuration + */ + public MultiLayerConfiguration getLayerWiseConfigurations() { + return layerWiseConfigurations; + } + + /** + * This method is intended for internal/developer use only. + */ + public void setLayerWiseConfigurations(MultiLayerConfiguration layerWiseConfigurations) { + this.layerWiseConfigurations = layerWiseConfigurations; + } + + /** + * Initialize the MultiLayerNetwork. This should be called once before the network is used. This + * is functionally equivalent to calling {@code init(null, false)}. + * + * @see MultiLayerNetwork#init(INDArray, boolean) + */ + public void init() { + init(null, false); + } + + /** + * Initialize the MultiLayerNetwork, optionally with an existing parameters array. If an existing + * parameters array is specified, it will be used (and the values will not be modified) in the + * network; if no parameters array is specified, parameters will be initialized randomly according + * to the network configuration. + * + * @param parameters Network parameter. May be null. If null: randomly initialize. + * @param cloneParametersArray Whether the parameter array (if any) should be cloned, or used + * directly + */ + public void init(INDArray parameters, boolean cloneParametersArray) { + if (layerWiseConfigurations == null || layers == null) { + intializeConfigurations(); + } + if (initCalled) { + return; + } + + DataType netDtype = getLayerWiseConfigurations().getDataType(); + if (parameters != null && parameters.dataType() != netDtype) { + Preconditions.checkState(parameters.rank() == 2 && parameters.size(0) == 1, + "Invalid parameters array: should be rank 2 with shape [1,numParams]. Got %ndShape", + parameters); + if (cloneParametersArray) { + try (MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) { + parameters = parameters.castTo(netDtype); + } + } else { + throw new IllegalStateException( + "Error initializing network: Network datatype is set to " + netDtype + + " but provided array has datatype " + parameters.dataType() + + " with cloneParametersArray argument" + + " set to false. Cannot initialize net with specified datatype array if that array does not match network datatype"); + } + } + + if (layerMap == null) { + layerMap = new LinkedHashMap<>(); + } + + if (layerWiseConfigurations.getTrainingWorkspaceMode() == null) { + layerWiseConfigurations.setTrainingWorkspaceMode(WorkspaceMode.NONE); + } + + if (layerWiseConfigurations.getInferenceWorkspaceMode() == null) { + layerWiseConfigurations.setInferenceWorkspaceMode(WorkspaceMode.NONE); + } + + if (layerWiseConfigurations.getCacheMode() == null) { + layerWiseConfigurations.setCacheMode(CacheMode.NONE); + } + + OneTimeLogger.info(log, + "Starting MultiLayerNetwork with WorkspaceModes set to [training: {}; inference: {}], cacheMode set to [{}]", + layerWiseConfigurations.getTrainingWorkspaceMode(), + layerWiseConfigurations.getInferenceWorkspaceMode(), + layerWiseConfigurations.getCacheMode()); + + int nLayers = getnLayers(); + + if (nLayers < 1) { + throw new IllegalStateException("Unable to create network: number of layers is less than 1"); + } + + if (this.layers == null || this.layers[0] == null) { + if (this.layers == null) { + this.layers = new Layer[nLayers]; + } + + //First: Work out total length of params + long paramLength = 0; + val nParamsPerLayer = new long[nLayers]; + for (int i = 0; i < nLayers; i++) { + NeuralNetConfiguration conf = layerWiseConfigurations.getConf(i); + conf.getLayer().setDataType(netDtype); + nParamsPerLayer[i] = conf.getLayer().initializer().numParams(conf); + paramLength += nParamsPerLayer[i]; + } + + //Create parameters array, if required + boolean initializeParams; + if (parameters != null) { + if (!parameters.isRowVectorOrScalar()) { + throw new IllegalArgumentException("Invalid parameters: should be a row vector"); + } + if (parameters.length() != paramLength) { + throw new IllegalArgumentException("Invalid parameters: expected length " + paramLength + + ", got length " + parameters.length()); + } + + if (cloneParametersArray) { + flattenedParams = parameters.dup(); + } else { + flattenedParams = parameters; + } + + initializeParams = false; + } else if (paramLength > 0) { + flattenedParams = Nd4j.create(netDtype, 1, paramLength); + initializeParams = true; + } else { + //Edge case: 0 params in network + flattenedParams = null; + initializeParams = false; + } + + //Set RNG seed, for repeatability between initializations when set + if (initializeParams) { + Nd4j.getRandom().setSeed(getDefaultConfiguration().getSeed()); + } + + // construct multi-layer + long paramCountSoFar = 0; + for (int i = 0; i < nLayers; i++) { + INDArray paramsView; + if (nParamsPerLayer[i] > 0) { + paramsView = flattenedParams.get(NDArrayIndex.interval(0, 0, true), + NDArrayIndex.interval(paramCountSoFar, paramCountSoFar + nParamsPerLayer[i])); + } else { + paramsView = null; + } + paramCountSoFar += nParamsPerLayer[i]; + + NeuralNetConfiguration conf = layerWiseConfigurations.getConf(i); + layers[i] = conf.getLayer() + .instantiate(conf, trainingListeners, i, paramsView, initializeParams, netDtype); + layerMap.put(conf.getLayer().getLayerName(), layers[i]); + } + initCalled = true; + } + + //Set parameters in MultiLayerNetwork.defaultConfiguration for later use in BaseOptimizer.setupSearchState() etc + defaultConfiguration.clearVariables(); + List variables = defaultConfiguration.variables(false); + for (int i = 0; i < layers.length; i++) { + if (layers[i] == null) { + throw new IllegalStateException( + "Encountered null layer during initialization for layer " + i + + ": " + layerWiseConfigurations.getConf(i).getLayer().getClass().getSimpleName() + + " initialization " + + "returned null layer?"); + } + + for (String s : layers[i].conf().variables()) { + variables.add(i + "_" + s); + } + } + + // now we init solver & optimizer + if (solver == null) { + try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { + solver = new Solver.Builder().configure(conf()).listeners(getListeners()).model(this) .build(); - - protected WorkspaceConfiguration WS_LAYER_ACT_X_CONFIG; - - protected static final WorkspaceConfiguration WS_RNN_LOOP_WORKING_MEM_CONFIG = WorkspaceConfiguration.builder() - .initialSize(0).overallocationLimit(0.05).policyReset(ResetPolicy.BLOCK_LEFT) - .policyAllocation(AllocationPolicy.OVERALLOCATE).policySpill(SpillPolicy.REALLOCATE) - .policyLearning(LearningPolicy.FIRST_LOOP).build(); - - - public MultiLayerNetwork(MultiLayerConfiguration conf) { - this.layerWiseConfigurations = conf; - this.defaultConfiguration = conf.getConf(0).clone(); - - //Working memory: should learn over course of: (a) full forward pass, and (b) full backward pass - //Working memory should be opened once per layer and once per preprocessor, for each of forward and backward passes - int numWorkingMem = 2 * (layerWiseConfigurations.getConfs().size() + layerWiseConfigurations.getInputPreProcessors().size()); - WS_LAYER_WORKING_MEM_CONFIG = getLayerWorkingMemWSConfig(numWorkingMem); - WS_LAYER_ACT_X_CONFIG = getLayerActivationWSConfig(layerWiseConfigurations.getConfs().size()); + solver.initOptimizer(); + } } - protected static WorkspaceConfiguration getLayerWorkingMemWSConfig(int numWorkingMemCycles){ - return WorkspaceConfiguration.builder() - .initialSize(0) - .overallocationLimit(0.02) - .policyLearning(LearningPolicy.OVER_TIME) - .cyclesBeforeInitialization(numWorkingMemCycles) - .policyReset(ResetPolicy.BLOCK_LEFT) - .policySpill(SpillPolicy.REALLOCATE) - .policyAllocation(AllocationPolicy.OVERALLOCATE) - .build(); + //Mark that input modification is allowed. + //TODO When is it safe to NOT skip the very first layer? It's not always safe... + // For example dropout + iterating over List that is used for multiple epochs... + for (int i = 1; i < layers.length; i++) { + layers[i].allowInputModification(true); } - protected static WorkspaceConfiguration getLayerActivationWSConfig(int numLayers){ - //Activations memory: opened once per layer - for every second layer (preprocessors are within the loop). - //Technically we could set learning to numLayers / 2, but will set to numLayers for simplicity, and also to - // account for a backward pass - return WorkspaceConfiguration.builder() - .initialSize(0) - .overallocationLimit(0.02) - .policyLearning(LearningPolicy.OVER_TIME) - .cyclesBeforeInitialization(numLayers) - .policyReset(ResetPolicy.BLOCK_LEFT) - .policySpill(SpillPolicy.REALLOCATE) - .policyAllocation(AllocationPolicy.OVERALLOCATE) - .build(); + synchronizeIterEpochCounts(); + } + + /** + * This method allows you to specificy GradientsAccumulator instance to be used with this + * model
+ *
+ * PLEASE NOTE: Do not use this method unless you understand how to use GradientsAccumulator & + * updates sharing.
PLEASE NOTE: Do not use this method on standalone model + * + * @param accumulator Gradient accumulator to use for the network + */ + public void setGradientsAccumulator(GradientsAccumulator accumulator) { + if (!isInitCalled()) { + init(); } - /** - * This method sets specified CacheMode for all layers within network - * - * @param mode - */ - public void setCacheMode(CacheMode mode) { - if (mode == null) - mode = CacheMode.NONE; - - for (Layer layer : layers) { - layer.setCacheMode(mode); - } + if (solver == null) { + try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { + solver = new Solver.Builder().configure(conf()).listeners(getListeners()).model(this) + .build(); + } } - /** - * Set the last ETL time in milliseconds, for informational/reporting purposes. Generally used internally. - * @param time ETL time - */ - public void setLastEtlTime(long time) { - lastEtlTime.set(time); - } + solver.getOptimizer().setGradientsAccumulator(accumulator); + } - /** - * Get the last ETL time. This in informational, and is the amount of time in milliseconds that was required - * to obtain the last DataSet/MultiDataSet during fitting. - * A value consistently above 0 may indicate a data feeding bottleneck, or no asynchronous data prefetching (async - * prefetch is enabled by default) - * @return The last ETL time in milliseconds, if avaliable (or 0 if not) - */ - public long getLastEtlTime() { - Long time = lastEtlTime.get(); - return time == null ? 0L : time; - } + public boolean isInitCalled() { + return initCalled; + } - /** - * Initialize the network based on the configuration (a MultiLayerConfiguration in JSON format) and parameters array - * - * @param conf the configuration json - * @param params the parameters for the network - */ - public MultiLayerNetwork(String conf, INDArray params) { - this(MultiLayerConfiguration.fromJson(conf)); + /** + * This method: initializes the flattened gradients array (used in backprop) and sets the + * appropriate subset in all layers. As a general rule, this shouldn't ever need to be called + * manually when doing training via fit(DataSet) or fit(DataSetIterator) + */ + public void initGradientsView() { + try (MemoryWorkspace ws = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { + if (layers == null) { init(); - setParameters(params); + } + + int nLayers = layers.length; + + //First: Work out total length of params + long paramLength = 0; + val nParamsPerLayer = new long[nLayers]; + for (int i = 0; i < nLayers; i++) { + NeuralNetConfiguration conf = layerWiseConfigurations.getConf(i); + nParamsPerLayer[i] = conf.getLayer().initializer().numParams(conf); + paramLength += nParamsPerLayer[i]; + } + + if (paramLength > 0) { + flattenedGradients = Nd4j.create(flattenedParams.dataType(), new long[]{1, paramLength}, + 'f'); //No need to initialize, as each layer will do it each iteration anyway + } + + long paramsSoFar = 0; + for (int i = 0; i < layers.length; i++) { + if (nParamsPerLayer[i] == 0) { + continue; //This layer doesn't have any parameters... + } + INDArray thisLayerGradView = flattenedGradients.get(NDArrayIndex.interval(0, 0, true), + NDArrayIndex.interval(paramsSoFar, paramsSoFar + nParamsPerLayer[i])); + layers[i].setBackpropGradientsViewArray(thisLayerGradView); + paramsSoFar += nParamsPerLayer[i]; + } + } + } + + protected INDArray activationFromPrevLayer(int curr, INDArray input, boolean training, + LayerWorkspaceMgr mgr) { + if (getLayerWiseConfigurations().getInputPreProcess(curr) != null) { + input = getLayerWiseConfigurations().getInputPreProcess(curr) + .preProcess(input, getInputMiniBatchSize(), mgr); } + INDArray ret = layers[curr].activate(input, training, mgr); + return ret; + } - /** - * Initialize the network based on the configuration and parameters array - * - * @param conf the configuration - * @param params the parameters - */ - public MultiLayerNetwork(MultiLayerConfiguration conf, INDArray params) { - this(conf); - init(); - setParameters(params); + /** + * Calculate activation for few layers at once. Suitable for autoencoder partial activation. + *

+ * In example: in 10-layer deep autoencoder, layers 0 - 4 inclusive are used for encoding part, + * and layers 5-9 inclusive are used for decoding part. + * + * @param from first layer to be activated, inclusive + * @param to last layer to be activated, inclusive + * @return the activation from the last layer + */ + public INDArray activateSelectedLayers(int from, int to, INDArray input) { + if (input == null) { + throw new IllegalStateException("Unable to perform activation; no input found"); + } + if (from < 0 || from >= layers.length || from >= to) { + throw new IllegalStateException("Unable to perform activation; FROM is out of layer space"); + } + if (to < 1 || to >= layers.length) { + throw new IllegalStateException("Unable to perform activation; TO is out of layer space"); } + try { + LayerWorkspaceMgr mgr = LayerWorkspaceMgr.noWorkspaces(helperWorkspaces); //TODO - protected void intializeConfigurations() { - if (layerWiseConfigurations == null) - layerWiseConfigurations = new MultiLayerConfiguration.Builder().build(); - - if (layers == null) - layers = new Layer[getnLayers()]; - - if (defaultConfiguration == null) - defaultConfiguration = new NeuralNetConfiguration.Builder().build(); + INDArray res = input; + for (int l = from; l <= to; l++) { + res = this.activationFromPrevLayer(l, res, false, mgr); + } + return res; + } catch (OutOfMemoryError e) { + CrashReportingUtil.writeMemoryCrashDump(this, e); + throw e; } + } + /** + * Compute all layer activations, from input to output of the output layer. Note that the input is + * included in the list: thus feedForward(in,train).get(0) is the inputs, .get(1) is the + * activations of layer 0, and so on. + * + * @param train Training: if true, perform forward pass/inference at training time. Usually, + * inference is performed with train = false. This impacts whether dropout etc is + * applied or not. + * @return The list of activations for each layer, including the input + */ + public List feedForward(INDArray input, boolean train) { + setInput(input); + return feedForward(train); + } - /** - * Perform layerwise pretraining for one epoch - see {@link #pretrain(DataSetIterator, int)} - */ - public void pretrain(DataSetIterator iter) { - pretrain(iter, 1); + /** + * Compute activations from input to output of the output layer. As per + * {@link #feedForward(INDArray, boolean)} but using the inputs that have previously been set + * using {@link #setInput(INDArray)} + * + * @return the list of activations for each layer + */ + public List feedForward(boolean train) { + try { + return ffToLayerActivationsDetached(train, FwdPassType.STANDARD, false, layers.length - 1, + input, mask, null, true); + } catch (OutOfMemoryError e) { + CrashReportingUtil.writeMemoryCrashDump(this, e); + throw e; } + } - /** - * Perform layerwise unsupervised training on all pre-trainable layers in the network (VAEs, Autoencoders, etc), for the specified - * number of epochs each. For example, if numEpochs=3, then layer 0 will be fit for 3 epochs, followed by layer 1 - * for 3 epochs, and so on.
- * Note that pretraining will be performed on one layer after the other. To perform unsupervised training on a single layer, - * use {@link #pretrainLayer(int, DataSetIterator)} - * - * @param iter Training data - */ - public void pretrain(DataSetIterator iter, int numEpochs){ - if (flattenedGradients == null) { - initGradientsView(); - } - - for (int i = 0; i < getnLayers(); i++) { - pretrainLayer(i, iter, numEpochs); - } + /** + * Perform feed-forward, optionally (not) clearing the layer input arrays.
Note: when using + * clearInputs=false, there can be some performance and memory overhead: this is because the + * arrays are defined outside of workspaces (which are enabled by default) - otherwise, + * old/invalidated arrays could still be accessed after calling this method. Consequently: Don't + * use clearInputs=false unless you have a use case that requires them to remain after + * feed-forward has been completed + * + * @param train training mode (true) or test mode (false) + * @param clearInputs If false: don't clear the layer inputs + * @return Activations from feed-forward + */ + public List feedForward(boolean train, boolean clearInputs) { + try { + return ffToLayerActivationsDetached(train, FwdPassType.STANDARD, false, layers.length - 1, + input, mask, null, clearInputs); + } catch (OutOfMemoryError e) { + CrashReportingUtil.writeMemoryCrashDump(this, e); + throw e; } + } - /** - * Fit for one epoch - see {@link #pretrainLayer(int, DataSetIterator, int)} - */ - public void pretrainLayer(int layerIdx, DataSetIterator iter) { - pretrainLayer(layerIdx, iter, 1); + /** + * Compute the activations from the input to the specified layer.
To compute activations for + * all layers, use feedForward(...) methods
Note: output list includes the original input. So + * list.get(0) is always the original input, and list.get(i+1) is the activations of the ith + * layer. + * + * @param layerNum Index of the last layer to calculate activations for. Layers are zero-indexed. + * feedForwardToLayer(i,input) will return the activations for layers 0..i + * (inclusive) + * @param input Input to the network + * @return list of activations. + */ + public List feedForwardToLayer(int layerNum, INDArray input) { + try { + return ffToLayerActivationsDetached(false, FwdPassType.STANDARD, false, layerNum, input, mask, + null, true); + } catch (OutOfMemoryError e) { + CrashReportingUtil.writeMemoryCrashDump(this, e); + throw e; } + } - /** - * Perform layerwise unsupervised training on a single pre-trainable layer in the network (VAEs, Autoencoders, etc) - * for the specified number of epochs
- * If the specified layer index (0 to numLayers - 1) is not a pretrainable layer, this is a no-op. - * - * @param layerIdx Index of the layer to train (0 to numLayers-1) - * @param iter Training data - * @param numEpochs Number of epochs to fit the specified layer for - */ - public void pretrainLayer(int layerIdx, DataSetIterator iter, int numEpochs) { - Preconditions.checkState(numEpochs > 0, "Number of epochs (%s) must be a positive number", numEpochs); - - if (flattenedGradients == null) { - initGradientsView(); - } - if (layerIdx >= layers.length) { - throw new IllegalArgumentException( - "Cannot pretrain layer: layerIdx (" + layerIdx + ") >= numLayers (" + layers.length + ")"); - } - - Layer layer = layers[layerIdx]; - if (!layer.isPretrainLayer()) - return; - - if(numEpochs > 1 && !iter.resetSupported()) - throw new IllegalStateException("Cannot fit multiple epochs (" + numEpochs + ") on an iterator that doesn't support resetting"); - - if (!iter.hasNext() && iter.resetSupported()) { - iter.reset(); - } - - log.info("Starting unsupervised training on layer " + layerIdx + " for " + numEpochs + " epochs"); - for(int i=0; i 0) - iter.reset(); - - while (iter.hasNext()) { - DataSet next = iter.next(); - input = next.getFeatures(); - pretrainLayer(layerIdx, input); - } - } - - int ec = getLayer(layerIdx).conf().getEpochCount() + 1; - getLayer(layerIdx).conf().setEpochCount(ec); + /** + * Compute the activations from the input to the specified layer.
To compute activations for + * all layers, use feedForward(...) methods
Note: output list includes the original input. So + * list.get(0) is always the original input, and list.get(i+1) is the activations of the ith + * layer. + * + * @param layerNum Index of the last layer to calculate activations for. Layers are zero-indexed. + * feedForwardToLayer(i,input) will return the activations for layers 0..i + * (inclusive) + * @param input Input to the network + * @param train true for training, false for test (i.e., false if using network after + * training) + * @return list of activations. + */ + public List feedForwardToLayer(int layerNum, INDArray input, boolean train) { + try { + int layerVertexIdx = layers[layerNum].getIndex(); + return ffToLayerActivationsDetached(train, FwdPassType.STANDARD, false, layerVertexIdx, input, + mask, null, true); + } catch (OutOfMemoryError e) { + CrashReportingUtil.writeMemoryCrashDump(this, e); + throw e; } + } - /** - * Perform layerwise unsupervised training on a single pre-trainable layer in the network (VAEs, Autoencoders, etc)
- * If the specified layer index (0 to numLayers - 1) is not a pretrainable layer, this is a no-op. - * - * @param layerIdx Index of the layer to train (0 to numLayers-1) - * @param features Training data array - */ - public void pretrainLayer(int layerIdx, INDArray features) { - setInput(features); - setLayerMaskArrays(null, null); + /** + * Compute the activations from the input to the specified layer, using the currently set input + * for the network.
To compute activations for all layers, use feedForward(...) methods
+ * Note: output list includes the original input. So list.get(0) is always the original input, and + * list.get(i+1) is the activations of the ith layer. + * + * @param layerNum Index of the last layer to calculate activations for. Layers are zero-indexed. + * feedForwardToLayer(i,input) will return the activations for layers 0..i + * (inclusive) + * @param train true for training, false for test (i.e., false if using network after + * training) + * @return list of activations. + */ + public List feedForwardToLayer(int layerNum, boolean train) { + try { + return ffToLayerActivationsDetached(train, FwdPassType.STANDARD, false, layerNum, input, mask, + null, true); + } catch (OutOfMemoryError e) { + CrashReportingUtil.writeMemoryCrashDump(this, e); + throw e; + } + } - if (flattenedGradients == null) { - initGradientsView(); - } - if (layerIdx >= layers.length) { - throw new IllegalArgumentException( - "Cannot pretrain layer: layerIdx (" + layerIdx + ") >= numLayers (" + layers.length + ")"); + protected void validateArrayWorkspaces(LayerWorkspaceMgr mgr, INDArray array, ArrayType arrayType, + int layerIdx, + boolean isPreprocessor, String op) { + try { + mgr.validateArrayLocation(arrayType, array, false, layerIdx > 0); + } catch (ND4JWorkspaceException e) { + String layerName = layers[layerIdx].conf().getLayer().getLayerName(); + String clazz; + if (isPreprocessor) { + clazz = layerWiseConfigurations.getInputPreProcess(layerIdx).getClass().getName(); + } else { + clazz = layers[layerIdx].getClass().getName(); + } + throw new IllegalStateException( + op + ": array (" + arrayType + ") workspace validation failed (" + + (isPreprocessor ? "preprocessor" : "layer ") + layerIdx + (layerName != null ? + " - layer name \"" + + layerName + "\"" : "") + " - class: " + clazz + + ") - array is defined in incorrect workspace", e); + } + } + + /** + * Feed-forward through the network - returning all array activations in a list, detached from any + * workspace. Note that no workspace should be active externally when calling this method (an + * exception will be thrown if a workspace is open externally) + * + * @param train Training mode (true) or test/inference mode (false) + * @param fwdPassType Type of forward pass to perform (STANDARD or + * RNN_ACTIVATE_WITH_STORED_STATE only) + * @param storeLastForTBPTT ONLY used if fwdPassType == + * FwdPassType.RNN_ACTIVATE_WITH_STORED_STATE + * @param layerIndex Index (inclusive) to stop forward pass at. For all layers, use + * numLayers-1 + * @param input Input to the network + * @param fMask Feature mask array. May be null. + * @param lMask Label mask array. May be null. + * @param clearInputs Whether the layer inputs should be cleared + * @return List of activations (including the input), detached from any workspace + */ + protected synchronized List ffToLayerActivationsDetached(boolean train, + @NonNull FwdPassType fwdPassType, + boolean storeLastForTBPTT, int layerIndex, @NonNull INDArray input, + INDArray fMask, INDArray lMask, boolean clearInputs) { + setInput(input); + setLayerMaskArrays(fMask, lMask); + + //Verify that no workspace is open externally + WorkspaceUtils.assertNoWorkspacesOpen( + "Expected no workspace active in ffToLayerActivationsDetached"); + + LayerWorkspaceMgr workspaceMgr; + WorkspaceMode wsm = (train ? layerWiseConfigurations.getTrainingWorkspaceMode() + : layerWiseConfigurations.getInferenceWorkspaceMode()); + if (wsm == WorkspaceMode.NONE) { + workspaceMgr = LayerWorkspaceMgr.noWorkspaces(); + } else { + workspaceMgr = LayerWorkspaceMgr.builder() + .noWorkspaceFor(ArrayType.ACTIVATIONS) + .with(ArrayType.INPUT, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) + .with(ArrayType.FF_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) + .with(ArrayType.RNN_FF_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, + WS_RNN_LOOP_WORKING_MEM_CONFIG) + .build(); + + if (input.isAttached()) { + //Don't leverage out of async DataSetIterator workspaces + workspaceMgr.setNoLeverageOverride(input.data().getParentWorkspace().getId()); + } + + if (!clearInputs) { + workspaceMgr.setScopedOutFor(ArrayType.INPUT); + } + } + workspaceMgr.setHelperWorkspacePointers(helperWorkspaces); + + List out = new ArrayList<>(); + out.add(workspaceMgr.leverageTo(ArrayType.INPUT, + input)); //Should be unnecessary (and no op), if layer is implemented correctly + + for (int i = 0; i <= layerIndex; i++) { + try (MemoryWorkspace wsFFWorking = workspaceMgr.notifyScopeEntered( + ArrayType.FF_WORKING_MEM)) { + if (getLayerWiseConfigurations().getInputPreProcess(i) != null) { + input = getLayerWiseConfigurations().getInputPreProcess(i) + .preProcess(input, getInputMiniBatchSize(), workspaceMgr); + //Validation: Exception if invalid (bad preprocessor implementation) + validateArrayWorkspaces(workspaceMgr, input, ArrayType.ACTIVATIONS, i, true, + "Feed forward to layer (inference)"); } - LayerWorkspaceMgr workspaceMgr; - if(layerWiseConfigurations.getTrainingWorkspaceMode() == WorkspaceMode.NONE){ - workspaceMgr = LayerWorkspaceMgr.noWorkspaces(); + if (fwdPassType == FwdPassType.STANDARD) { + input = layers[i].activate(input, train, workspaceMgr); + } else if (fwdPassType == FwdPassType.RNN_ACTIVATE_WITH_STORED_STATE) { + if (layers[i] instanceof RecurrentLayer) { + input = ((RecurrentLayer) layers[i]).rnnActivateUsingStoredState(input, train, + storeLastForTBPTT, workspaceMgr); + } else if (layers[i] instanceof BaseWrapperLayer + && ((BaseWrapperLayer) layers[i]).getUnderlying() instanceof RecurrentLayer) { + RecurrentLayer rl = (RecurrentLayer) ((BaseWrapperLayer) layers[i]).getUnderlying(); + input = rl.rnnActivateUsingStoredState(input, train, storeLastForTBPTT, workspaceMgr); + } else if (layers[i] instanceof MultiLayerNetwork) { + List temp = ((MultiLayerNetwork) layers[i]).rnnActivateUsingStoredState(input, + train, storeLastForTBPTT); + input = temp.get(temp.size() - 1); + } else { + input = layers[i].activate(input, train, workspaceMgr); + } } else { - workspaceMgr = LayerWorkspaceMgr.builder() - .defaultWorkspace(WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) - .with(ArrayType.RNN_FF_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM_CONFIG) - .build(); + throw new IllegalStateException( + "Forward pass type not supported for this method: " + fwdPassType); } - workspaceMgr.setHelperWorkspacePointers(helperWorkspaces); - Layer layer = layers[layerIdx]; - if (!layer.isPretrainLayer()) - return; + //Validation: Exception if invalid (bad layer implementation) + validateArrayWorkspaces(workspaceMgr, input, ArrayType.ACTIVATIONS, i, false, + "Feed forward to layer (inference)"); - //Do forward pass to the layer to be pretrained - INDArray outputOfPrevLayer; - if(layerIdx == 0) { - outputOfPrevLayer = input; + out.add(input); + } + if (clearInputs) { + layers[i].clear(); + } + } + + return out; + } + + /** + * Feed-forward through the network at training time - returning a list of all activations in a + * workspace (WS_ALL_LAYERS_ACT) if workspaces are enabled for training; or detached if no + * workspaces are used.
Note: if using workspaces for training, this method requires that + * WS_ALL_LAYERS_ACT is open externally.
If using NO workspaces, requires that no external + * workspace is open
Note that this method does NOT clear the inputs to each layer - instead, + * they are in the WS_ALL_LAYERS_ACT workspace for use in later backprop. + * + * @param layerIndex Index (inclusive) to stop forward pass at. For all layers, use + * numLayers-1 + * @param fwdPassType Type of forward pass to perform (STANDARD or + * RNN_ACTIVATE_WITH_STORED_STATE only) + * @param storeLastForTBPTT ONLY used if fwdPassType == + * FwdPassType.RNN_ACTIVATE_WITH_STORED_STATE + * @param input Input to network + * @param fMask Feature mask array. May be null + * @param lMask Label mask aray. May be null. + * @return + */ + protected synchronized List ffToLayerActivationsInWs(int layerIndex, + @NonNull FwdPassType fwdPassType, boolean storeLastForTBPTT, + @NonNull INDArray input, INDArray fMask, INDArray lMask) { + setInput(input); + setLayerMaskArrays(fMask, lMask); + + LayerWorkspaceMgr workspaceMgr; + if (layerWiseConfigurations.getTrainingWorkspaceMode() == WorkspaceMode.NONE) { + WorkspaceUtils.assertNoWorkspacesOpen( + "Expected no workspace active in ffToLayerActivationsInWs when training workspace is set to NONE"); + workspaceMgr = LayerWorkspaceMgr.noWorkspaces(); + } else { + workspaceMgr = LayerWorkspaceMgr.builder() + .with(ArrayType.INPUT, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) + .with(ArrayType.ACTIVATIONS, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) + .with(ArrayType.FF_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) + .with(ArrayType.RNN_FF_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, + WS_RNN_LOOP_WORKING_MEM_CONFIG) + .build(); + + if (input.isAttached()) { + //Don't leverage out of async DataSetIterator workspaces + workspaceMgr.setNoLeverageOverride(input.data().getParentWorkspace().getId()); + } + + if (layerWiseConfigurations.getCacheMode() != CacheMode.NONE) { + //For now: store cache mode activations in activations workspace + workspaceMgr.setWorkspace(ArrayType.FF_CACHE, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG); + workspaceMgr.setWorkspace(ArrayType.BP_WORKING_MEM, WS_LAYER_WORKING_MEM, + WS_LAYER_WORKING_MEM_CONFIG); + } + + WorkspaceUtils.assertOpenAndActive(WS_ALL_LAYERS_ACT, + "ffToLayerActivationsInWs method requires workspace WS_ALL_LAYERS_ACT to be open"); + } + workspaceMgr.setHelperWorkspacePointers(helperWorkspaces); + + List out = new ArrayList<>(); + out.add(workspaceMgr.leverageTo(ArrayType.INPUT, input)); //Probably unnecessary usually + + boolean traceLog = log.isTraceEnabled(); + + for (int i = 0; i <= layerIndex; i++) { + try (MemoryWorkspace wsFFWorking = workspaceMgr.notifyScopeEntered( + ArrayType.FF_WORKING_MEM)) { + if (getLayerWiseConfigurations().getInputPreProcess(i) != null) { + input = getLayerWiseConfigurations().getInputPreProcess(i) + .preProcess(input, getInputMiniBatchSize(), workspaceMgr); + //Validation: Exception if invalid (bad preprocessor implementation) + validateArrayWorkspaces(workspaceMgr, input, ArrayType.ACTIVATIONS, i, true, + "Feed forward to layer (training)"); + } + + if (traceLog) { + log.trace("About to forward pass: {} - {}", i, layers[i].getClass().getSimpleName()); + } + + if (fwdPassType == FwdPassType.STANDARD) { + input = layers[i].activate(input, true, workspaceMgr); + } else if (fwdPassType == FwdPassType.RNN_ACTIVATE_WITH_STORED_STATE) { + if (layers[i] instanceof RecurrentLayer) { + input = ((RecurrentLayer) layers[i]).rnnActivateUsingStoredState(input, true, + storeLastForTBPTT, workspaceMgr); + } else if (layers[i] instanceof BaseWrapperLayer + && ((BaseWrapperLayer) layers[i]).getUnderlying() instanceof RecurrentLayer) { + RecurrentLayer rl = (RecurrentLayer) ((BaseWrapperLayer) layers[i]).getUnderlying(); + input = rl.rnnActivateUsingStoredState(input, true, storeLastForTBPTT, workspaceMgr); + } else if (layers[i] instanceof MultiLayerNetwork) { + List temp = ((MultiLayerNetwork) layers[i]).rnnActivateUsingStoredState(input, + true, storeLastForTBPTT); + input = temp.get(temp.size() - 1); + } else { + input = layers[i].activate(input, true, workspaceMgr); + } } else { - //Yes, this part of training - but we'll do forward psas as inference mode when doing layerwise training - // to effectively freeze earlier layers and not apply dropout etc - outputOfPrevLayer = outputOfLayerDetached(false, FwdPassType.STANDARD, layerIndex-1, features, null, null, null); + throw new IllegalStateException( + "FwdPassType not supported for this method: " + fwdPassType); } - try(MemoryWorkspace ws = workspaceMgr.notifyScopeEntered(ArrayType.FF_WORKING_MEM)) { - if (layerWiseConfigurations.getInputPreProcess(layerIdx) != null) { - - if (input.size(0) > Integer.MAX_VALUE) - throw new ND4JArraySizeException(); - outputOfPrevLayer = layerWiseConfigurations.getInputPreProcess(layerIdx).preProcess(outputOfPrevLayer, (int) input.size(0), - LayerWorkspaceMgr.noWorkspaces(helperWorkspaces)); - } - - layer.fit(outputOfPrevLayer, workspaceMgr); - } - } - - @Override - public int batchSize() { - //In 99+% of cases, the input and labels dimension 0 size should be identical - //The only real exceptions: space to batch, and batch to space layers - //In those cases, we should base it on the labels size, as this impacts gradient calculation - if (input.size(0) > Integer.MAX_VALUE || labels.size(0) > Integer.MAX_VALUE) - throw new ND4JArraySizeException(); - return labels == null ? (int) input.size(0) : (int)labels.size(0); - } - - @Override - public NeuralNetConfiguration conf() { - return defaultConfiguration; - } - - @Override - public void setConf(NeuralNetConfiguration conf) { - throw new UnsupportedOperationException(); - } - - @Override - public INDArray input() { - return input; - } - - @Override - public ConvexOptimizer getOptimizer() { - return solver.getOptimizer(); - } - - /** - * Get one parameter array for the network.
- * In MultiLayerNetwork, parameters are keyed like "0_W" and "0_b" to mean "weights of layer index 0" and "biases - * of layer index 0" respectively. Numbers increment sequentially, and the suffixes ("W", "b" etc) depend on the - * layer type, and are defined in the relevant parameter initializers for each layer.
- * Note that the returned INDArrays are views of the underlying network parameters, so modifications of the returned - * arrays will impact the parameters of the network. - * - * @param param the key of the parameter - * @return The specified parameter array for the network - * @see #paramTable() paramTable() method, for a map of all parameters - */ - @Override - public INDArray getParam(String param) { - //Get params for MultiLayerNetwork sub layers. - int idx = param.indexOf('_'); - if (idx == -1) - throw new IllegalStateException("Invalid param key: does not have layer separator: \"" + param + "\""); - int layerIdx = Integer.parseInt(param.substring(0, idx)); - String newKey = param.substring(idx + 1); - - return layers[layerIdx].getParam(newKey); - } - - /** - * Return a map of all parameters in the network. Parameter names are as described in {@link #getParam(String)}. - * As per {@link #getParam(String)} the returned arrays are views - modifications to these will impact - * the underlying network parameters - * @return A map of all parameters in the network - */ - @Override - public Map paramTable() { - return paramTable(false); - } - - /** - * Returns a map of all parameters in the network as per {@link #paramTable()}.
- * Optionally (with backpropParamsOnly=true) only the 'backprop' parameters are returned - that is, any parameters - * involved only in unsupervised layerwise pretraining not standard inference/backprop are excluded from the returned list. - * @param backpropParamsOnly If true, return backprop params only. If false: return all params - * @return Parameters for the network - */ - public Map paramTable(boolean backpropParamsOnly) { - //Get all parameters from all layers - Map allParams = new LinkedHashMap<>(); - for (int i = 0; i < layers.length; i++) { - Map paramMap = layers[i].paramTable(backpropParamsOnly); - for (Map.Entry entry : paramMap.entrySet()) { - String newKey = i + "_" + entry.getKey(); - allParams.put(newKey, entry.getValue()); - } - } - return allParams; - } - - /** - * Intended for internal use - */ - @Override - public boolean updaterDivideByMinibatch(String paramName) { - int idx = paramName.indexOf('_'); - int layerIdx = Integer.parseInt(paramName.substring(0, idx)); - String subName = paramName.substring(idx+1); - return getLayer(layerIdx).updaterDivideByMinibatch(subName); - } - - /** - * Set the parameters of the netowrk. Note that the parameter keys must match the format as described in {@link #getParam(String)} - * and {@link #paramTable()}. Note that the values of the parameters used as an argument to this method are copied - - * i.e., it is safe to later modify/reuse the values in the provided paramTable without this impacting the network. - * - * @param paramTable Parameters to set - */ - @Override - public void setParamTable(Map paramTable) { - Map currParamTable = paramTable(); - if (!currParamTable.keySet().equals(paramTable.keySet())) { - throw new IllegalArgumentException("Cannot set param table: parameter keys do not match.\n" + "Current: " - + currParamTable.keySet() + "\nTo set: " + paramTable.keySet()); + if (input == null) { + throw new IllegalStateException("Layer " + i + " returned null activations"); } - for (String s : paramTable.keySet()) { - INDArray curr = currParamTable.get(s); - INDArray toSet = paramTable.get(s); - if (!Arrays.equals(curr.shape(), toSet.shape())) { - throw new IllegalArgumentException("Cannot set parameter table: parameter \"" + s + "\" shapes " - + "do not match. Current = " + Arrays.toString(curr.shape()) + ", to set = " - + Arrays.toString(toSet.shape())); - } - } + //Validation: Exception if invalid (bad layer implementation) + validateArrayWorkspaces(workspaceMgr, input, ArrayType.ACTIVATIONS, i, false, + "Feed forward to layer (training)"); + validateArrayWorkspaces(workspaceMgr, layers[i].input(), ArrayType.INPUT, i, false, + "Feed forward to layer (training)"); - //Now that we've checked ALL params (to avoid leaving net in half-modified state) - for (String s : paramTable.keySet()) { - INDArray curr = currParamTable.get(s); - INDArray toSet = paramTable.get(s); - curr.assign(toSet); + out.add(input); + + if (traceLog) { + log.trace("Completed forward pass: {} - {}", i, layers[i].getClass().getSimpleName()); } + } } - /** - * Set the values of a single parameter. See {@link #setParamTable(Map)} and {@link #getParam(String)} for more - * details. - * @param key the key of the parameter to set - * @param val the new values for the parameter - */ - @Override - public void setParam(String key, INDArray val) { - //Set params for MultiLayerNetwork sub layers. - int idx = key.indexOf('_'); - if (idx == -1) - throw new IllegalStateException("Invalid param key: not have layer separator: \"" + key + "\""); - int layerIdx = Integer.parseInt(key.substring(0, idx)); - String newKey = key.substring(idx + 1); - - layers[layerIdx].setParam(newKey, val); - } - - /** - * Get the configuration for the network - * @return Network configuration - */ - public MultiLayerConfiguration getLayerWiseConfigurations() { - return layerWiseConfigurations; - } - - /** - * This method is intended for internal/developer use only. - */ - public void setLayerWiseConfigurations(MultiLayerConfiguration layerWiseConfigurations) { - this.layerWiseConfigurations = layerWiseConfigurations; - } - - /** - * Initialize the MultiLayerNetwork. This should be called once before the network is used. - * This is functionally equivalent to calling {@code init(null, false)}. - * @see MultiLayerNetwork#init(INDArray, boolean) - */ - public void init() { - init(null, false); - } - - /** - * Initialize the MultiLayerNetwork, optionally with an existing parameters array. - * If an existing parameters array is specified, it will be used (and the values will not be modified) in the network; - * if no parameters array is specified, parameters will be initialized randomly according to the network configuration. - * - * @param parameters Network parameter. May be null. If null: randomly initialize. - * @param cloneParametersArray Whether the parameter array (if any) should be cloned, or used directly - */ - public void init(INDArray parameters, boolean cloneParametersArray) { - if (layerWiseConfigurations == null || layers == null) - intializeConfigurations(); - if (initCalled) - return; - - DataType netDtype = getLayerWiseConfigurations().getDataType(); - if(parameters != null && parameters.dataType() != netDtype){ - Preconditions.checkState(parameters.rank() == 2 && parameters.size(0) == 1, "Invalid parameters array: should be rank 2 with shape [1,numParams]. Got %ndShape", parameters); - if(cloneParametersArray){ - try(MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) { - parameters = parameters.castTo(netDtype); - } - } else { - throw new IllegalStateException("Error initializing network: Network datatype is set to " + netDtype - + " but provided array has datatype " + parameters.dataType() + " with cloneParametersArray argument" + - " set to false. Cannot initialize net with specified datatype array if that array does not match network datatype"); - } - } - - - if (layerMap == null) - layerMap = new LinkedHashMap<>(); - - if (layerWiseConfigurations.getTrainingWorkspaceMode() == null) - layerWiseConfigurations.setTrainingWorkspaceMode(WorkspaceMode.NONE); - - if (layerWiseConfigurations.getInferenceWorkspaceMode() == null) - layerWiseConfigurations.setInferenceWorkspaceMode(WorkspaceMode.NONE); - - if (layerWiseConfigurations.getCacheMode() == null) - layerWiseConfigurations.setCacheMode(CacheMode.NONE); - - OneTimeLogger.info(log, "Starting MultiLayerNetwork with WorkspaceModes set to [training: {}; inference: {}], cacheMode set to [{}]", - layerWiseConfigurations.getTrainingWorkspaceMode(), - layerWiseConfigurations.getInferenceWorkspaceMode(), - layerWiseConfigurations.getCacheMode()); - - int nLayers = getnLayers(); - - if (nLayers < 1) - throw new IllegalStateException("Unable to create network: number of layers is less than 1"); - - if (this.layers == null || this.layers[0] == null) { - if (this.layers == null) - this.layers = new Layer[nLayers]; - - //First: Work out total length of params - long paramLength = 0; - val nParamsPerLayer = new long[nLayers]; - for (int i = 0; i < nLayers; i++) { - NeuralNetConfiguration conf = layerWiseConfigurations.getConf(i); - conf.getLayer().setDataType(netDtype); - nParamsPerLayer[i] = conf.getLayer().initializer().numParams(conf); - paramLength += nParamsPerLayer[i]; - } - - //Create parameters array, if required - boolean initializeParams; - if (parameters != null) { - if (!parameters.isRowVectorOrScalar()) - throw new IllegalArgumentException("Invalid parameters: should be a row vector"); - if (parameters.length() != paramLength) - throw new IllegalArgumentException("Invalid parameters: expected length " + paramLength - + ", got length " + parameters.length()); - - if (cloneParametersArray) - flattenedParams = parameters.dup(); - else - flattenedParams = parameters; - - initializeParams = false; - } else if(paramLength > 0){ - flattenedParams = Nd4j.create(netDtype, 1, paramLength); - initializeParams = true; - } else { - //Edge case: 0 params in network - flattenedParams = null; - initializeParams = false; - } - - //Set RNG seed, for repeatability between initializations when set - if (initializeParams) { - Nd4j.getRandom().setSeed(getDefaultConfiguration().getSeed()); - } - - // construct multi-layer - long paramCountSoFar = 0; - for (int i = 0; i < nLayers; i++) { - INDArray paramsView; - if (nParamsPerLayer[i] > 0) { - paramsView = flattenedParams.get(NDArrayIndex.interval(0,0,true), - NDArrayIndex.interval(paramCountSoFar, paramCountSoFar + nParamsPerLayer[i])); - } else { - paramsView = null; - } - paramCountSoFar += nParamsPerLayer[i]; - - NeuralNetConfiguration conf = layerWiseConfigurations.getConf(i); - layers[i] = conf.getLayer().instantiate(conf, trainingListeners, i, paramsView, initializeParams, netDtype); - layerMap.put(conf.getLayer().getLayerName(), layers[i]); - } - initCalled = true; - } - - //Set parameters in MultiLayerNetwork.defaultConfiguration for later use in BaseOptimizer.setupSearchState() etc - defaultConfiguration.clearVariables(); - List variables = defaultConfiguration.variables(false); - for (int i = 0; i < layers.length; i++) { - if(layers[i] == null){ - throw new IllegalStateException("Encountered null layer during initialization for layer " + i + - ": " + layerWiseConfigurations.getConf(i).getLayer().getClass().getSimpleName() + " initialization " + - "returned null layer?"); - } - - for (String s : layers[i].conf().variables()) { - variables.add(i + "_" + s); - } - } - - // now we init solver & optimizer - if (solver == null) { - try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { - solver = new Solver.Builder().configure(conf()).listeners(getListeners()).model(this).build(); - solver.initOptimizer(); - } - } - - //Mark that input modification is allowed. - //TODO When is it safe to NOT skip the very first layer? It's not always safe... - // For example dropout + iterating over List that is used for multiple epochs... - for( int i=1; i - *
- * PLEASE NOTE: Do not use this method unless you understand how to use GradientsAccumulator & updates sharing.
- * PLEASE NOTE: Do not use this method on standalone model - * - * @param accumulator Gradient accumulator to use for the network - */ - public void setGradientsAccumulator(GradientsAccumulator accumulator) { - if (!isInitCalled()) - init(); - - if (solver == null) { - try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { - solver = new Solver.Builder().configure(conf()).listeners(getListeners()).model(this) - .build(); - } - } - - solver.getOptimizer().setGradientsAccumulator(accumulator); - } - - public boolean isInitCalled() { - return initCalled; - } - - /** - * This method: initializes the flattened gradients array (used in backprop) and sets the appropriate subset in all layers. - * As a general rule, this shouldn't ever need to be called manually when doing training via fit(DataSet) or fit(DataSetIterator) - */ - public void initGradientsView() { - try (MemoryWorkspace ws = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { - if (layers == null) - init(); - - int nLayers = layers.length; - - //First: Work out total length of params - long paramLength = 0; - val nParamsPerLayer = new long[nLayers]; - for (int i = 0; i < nLayers; i++) { - NeuralNetConfiguration conf = layerWiseConfigurations.getConf(i); - nParamsPerLayer[i] = conf.getLayer().initializer().numParams(conf); - paramLength += nParamsPerLayer[i]; - } - - if(paramLength > 0) { - flattenedGradients = Nd4j.create(flattenedParams.dataType(), new long[]{1, paramLength}, 'f'); //No need to initialize, as each layer will do it each iteration anyway - } - - long paramsSoFar = 0; - for (int i = 0; i < layers.length; i++) { - if (nParamsPerLayer[i] == 0) - continue; //This layer doesn't have any parameters... - INDArray thisLayerGradView = flattenedGradients.get(NDArrayIndex.interval(0,0,true), - NDArrayIndex.interval(paramsSoFar, paramsSoFar + nParamsPerLayer[i])); - layers[i].setBackpropGradientsViewArray(thisLayerGradView); - paramsSoFar += nParamsPerLayer[i]; - } - } - } - - protected INDArray activationFromPrevLayer(int curr, INDArray input, boolean training, LayerWorkspaceMgr mgr) { - if (getLayerWiseConfigurations().getInputPreProcess(curr) != null) { - input = getLayerWiseConfigurations().getInputPreProcess(curr).preProcess(input, getInputMiniBatchSize(), mgr); - } - - INDArray ret = layers[curr].activate(input, training, mgr); - return ret; - } - - /** - * Calculate activation for few layers at once. Suitable for autoencoder partial activation. - * - * In example: in 10-layer deep autoencoder, layers 0 - 4 inclusive are used for encoding part, and layers 5-9 inclusive are used for decoding part. - * - * @param from first layer to be activated, inclusive - * @param to last layer to be activated, inclusive - * @return the activation from the last layer - */ - public INDArray activateSelectedLayers(int from, int to, INDArray input) { - if (input == null) - throw new IllegalStateException("Unable to perform activation; no input found"); - if (from < 0 || from >= layers.length || from >= to) - throw new IllegalStateException("Unable to perform activation; FROM is out of layer space"); - if (to < 1 || to >= layers.length) - throw new IllegalStateException("Unable to perform activation; TO is out of layer space"); - - try { - LayerWorkspaceMgr mgr = LayerWorkspaceMgr.noWorkspaces(helperWorkspaces); //TODO - - INDArray res = input; - for (int l = from; l <= to; l++) { - res = this.activationFromPrevLayer(l, res, false, mgr); - } - return res; - } catch (OutOfMemoryError e){ - CrashReportingUtil.writeMemoryCrashDump(this, e); - throw e; - } - } - - /** - * Compute all layer activations, from input to output of the output layer. - * Note that the input is included in the list: thus feedForward(in,train).get(0) is the inputs, - * .get(1) is the activations of layer 0, and so on. - * - * @param train Training: if true, perform forward pass/inference at training time. Usually, inference is performed - * with train = false. This impacts whether dropout etc is applied or not. - * @return The list of activations for each layer, including the input - */ - public List feedForward(INDArray input, boolean train) { - setInput(input); - return feedForward(train); - } - - /** - * Compute activations from input to output of the output layer. - * As per {@link #feedForward(INDArray, boolean)} but using the inputs that have previously been set using {@link #setInput(INDArray)} - * - * @return the list of activations for each layer - */ - public List feedForward(boolean train) { - try { - return ffToLayerActivationsDetached(train, FwdPassType.STANDARD, false, layers.length-1, - input, mask, null, true); - } catch (OutOfMemoryError e) { - CrashReportingUtil.writeMemoryCrashDump(this, e); - throw e; - } - } - - /** - * Perform feed-forward, optionally (not) clearing the layer input arrays.
- * Note: when using clearInputs=false, there can be some performance and memory overhead: this is because the arrays are - * defined outside of workspaces (which are enabled by default) - otherwise, old/invalidated arrays could still be - * accessed after calling this method. Consequently: Don't use clearInputs=false unless you have a use case that - * requires them to remain after feed-forward has been completed - * - * @param train training mode (true) or test mode (false) - * @param clearInputs If false: don't clear the layer inputs - * @return Activations from feed-forward - */ - public List feedForward(boolean train, boolean clearInputs){ - try{ - return ffToLayerActivationsDetached(train, FwdPassType.STANDARD, false, layers.length-1, input, mask, null, clearInputs); - } catch (OutOfMemoryError e) { - CrashReportingUtil.writeMemoryCrashDump(this, e); - throw e; - } - } - - /** Compute the activations from the input to the specified layer.
- * To compute activations for all layers, use feedForward(...) methods
- * Note: output list includes the original input. So list.get(0) is always the original input, and - * list.get(i+1) is the activations of the ith layer. - * @param layerNum Index of the last layer to calculate activations for. Layers are zero-indexed. - * feedForwardToLayer(i,input) will return the activations for layers 0..i (inclusive) - * @param input Input to the network - * @return list of activations. - */ - public List feedForwardToLayer(int layerNum, INDArray input) { - try{ - return ffToLayerActivationsDetached(false, FwdPassType.STANDARD, false, layerNum, input, mask, null, true); - } catch (OutOfMemoryError e) { - CrashReportingUtil.writeMemoryCrashDump(this, e); - throw e; - } - } - - /** Compute the activations from the input to the specified layer.
- * To compute activations for all layers, use feedForward(...) methods
- * Note: output list includes the original input. So list.get(0) is always the original input, and - * list.get(i+1) is the activations of the ith layer. - * @param layerNum Index of the last layer to calculate activations for. Layers are zero-indexed. - * feedForwardToLayer(i,input) will return the activations for layers 0..i (inclusive) - * @param input Input to the network - * @param train true for training, false for test (i.e., false if using network after training) - * @return list of activations. - */ - public List feedForwardToLayer(int layerNum, INDArray input, boolean train) { - try { - int layerVertexIdx = layers[layerNum].getIndex(); - return ffToLayerActivationsDetached(train, FwdPassType.STANDARD, false, layerVertexIdx, input, mask, null, true); - } catch (OutOfMemoryError e) { - CrashReportingUtil.writeMemoryCrashDump(this, e); - throw e; - } - } - - /** Compute the activations from the input to the specified layer, using the currently set input for the network.
- * To compute activations for all layers, use feedForward(...) methods
- * Note: output list includes the original input. So list.get(0) is always the original input, and - * list.get(i+1) is the activations of the ith layer. - * @param layerNum Index of the last layer to calculate activations for. Layers are zero-indexed. - * feedForwardToLayer(i,input) will return the activations for layers 0..i (inclusive) - * @param train true for training, false for test (i.e., false if using network after training) - * @return list of activations. - */ - public List feedForwardToLayer(int layerNum, boolean train) { - try { - return ffToLayerActivationsDetached(train, FwdPassType.STANDARD, false, layerNum, input, mask, null, true); - } catch (OutOfMemoryError e) { - CrashReportingUtil.writeMemoryCrashDump(this, e); - throw e; - } - } - - - protected void validateArrayWorkspaces(LayerWorkspaceMgr mgr, INDArray array, ArrayType arrayType, int layerIdx, - boolean isPreprocessor, String op){ - try{ - mgr.validateArrayLocation(arrayType, array, false, layerIdx > 0); - } catch (ND4JWorkspaceException e){ - String layerName = layers[layerIdx].conf().getLayer().getLayerName(); - String clazz; - if(isPreprocessor){ - clazz = layerWiseConfigurations.getInputPreProcess(layerIdx).getClass().getName(); - } else { - clazz = layers[layerIdx].getClass().getName(); - } - throw new IllegalStateException(op + ": array (" + arrayType + ") workspace validation failed (" + - (isPreprocessor ? "preprocessor" : "layer ") + layerIdx + (layerName != null ? " - layer name \"" + - layerName + "\"" : "") + " - class: " + clazz + ") - array is defined in incorrect workspace", e); - } - } - - /** - * Feed-forward through the network - returning all array activations in a list, detached from any workspace. - * Note that no workspace should be active externally when calling this method (an exception will be thrown - * if a workspace is open externally) - * - * @param train Training mode (true) or test/inference mode (false) - * @param fwdPassType Type of forward pass to perform (STANDARD or RNN_ACTIVATE_WITH_STORED_STATE only) - * @param storeLastForTBPTT ONLY used if fwdPassType == FwdPassType.RNN_ACTIVATE_WITH_STORED_STATE - * @param layerIndex Index (inclusive) to stop forward pass at. For all layers, use numLayers-1 - * @param input Input to the network - * @param fMask Feature mask array. May be null. - * @param lMask Label mask array. May be null. - * @param clearInputs Whether the layer inputs should be cleared - * @return List of activations (including the input), detached from any workspace - */ - protected synchronized List ffToLayerActivationsDetached(boolean train, @NonNull FwdPassType fwdPassType, - boolean storeLastForTBPTT, int layerIndex, @NonNull INDArray input, - INDArray fMask, INDArray lMask, boolean clearInputs){ - setInput(input); - setLayerMaskArrays(fMask, lMask); - - //Verify that no workspace is open externally - WorkspaceUtils.assertNoWorkspacesOpen("Expected no workspace active in ffToLayerActivationsDetached"); - - LayerWorkspaceMgr workspaceMgr; - WorkspaceMode wsm = (train ? layerWiseConfigurations.getTrainingWorkspaceMode() : layerWiseConfigurations.getInferenceWorkspaceMode()); - if(wsm == WorkspaceMode.NONE){ - workspaceMgr = LayerWorkspaceMgr.noWorkspaces(); - } else { - workspaceMgr = LayerWorkspaceMgr.builder() - .noWorkspaceFor(ArrayType.ACTIVATIONS) - .with(ArrayType.INPUT, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) - .with(ArrayType.FF_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) - .with(ArrayType.RNN_FF_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM_CONFIG) - .build(); - - if(input.isAttached()){ - //Don't leverage out of async DataSetIterator workspaces - workspaceMgr.setNoLeverageOverride(input.data().getParentWorkspace().getId()); - } - - if(!clearInputs){ - workspaceMgr.setScopedOutFor(ArrayType.INPUT); - } - } - workspaceMgr.setHelperWorkspacePointers(helperWorkspaces); - - List out = new ArrayList<>(); - out.add(workspaceMgr.leverageTo(ArrayType.INPUT, input)); //Should be unnecessary (and no op), if layer is implemented correctly - - for( int i=0; i<=layerIndex; i++ ){ - try(MemoryWorkspace wsFFWorking = workspaceMgr.notifyScopeEntered(ArrayType.FF_WORKING_MEM)){ - if (getLayerWiseConfigurations().getInputPreProcess(i) != null) { - input = getLayerWiseConfigurations().getInputPreProcess(i).preProcess(input, getInputMiniBatchSize(), workspaceMgr); - //Validation: Exception if invalid (bad preprocessor implementation) - validateArrayWorkspaces(workspaceMgr, input, ArrayType.ACTIVATIONS, i, true, "Feed forward to layer (inference)"); - } - - if(fwdPassType == FwdPassType.STANDARD){ - input = layers[i].activate(input, train, workspaceMgr); - } else if (fwdPassType == FwdPassType.RNN_ACTIVATE_WITH_STORED_STATE) { - if (layers[i] instanceof RecurrentLayer) { - input = ((RecurrentLayer) layers[i]).rnnActivateUsingStoredState(input, train, - storeLastForTBPTT, workspaceMgr); - } else if(layers[i] instanceof BaseWrapperLayer && ((BaseWrapperLayer)layers[i]).getUnderlying() instanceof RecurrentLayer) { - RecurrentLayer rl = (RecurrentLayer) ((BaseWrapperLayer)layers[i]).getUnderlying(); - input = rl.rnnActivateUsingStoredState(input, train,storeLastForTBPTT, workspaceMgr); - } else if (layers[i] instanceof MultiLayerNetwork) { - List temp = ((MultiLayerNetwork) layers[i]).rnnActivateUsingStoredState(input, train, storeLastForTBPTT); - input = temp.get(temp.size() - 1); - } else { - input = layers[i].activate(input, train, workspaceMgr); - } - } else { - throw new IllegalStateException("Forward pass type not supported for this method: " + fwdPassType); - } - - //Validation: Exception if invalid (bad layer implementation) - validateArrayWorkspaces(workspaceMgr, input, ArrayType.ACTIVATIONS, i, false, "Feed forward to layer (inference)"); - - out.add(input); - } - if(clearInputs) { - layers[i].clear(); - } - } - - return out; - } - - /** - * Feed-forward through the network at training time - returning a list of all activations in a workspace (WS_ALL_LAYERS_ACT) - * if workspaces are enabled for training; or detached if no workspaces are used.
- * Note: if using workspaces for training, this method requires that WS_ALL_LAYERS_ACT is open externally.
- * If using NO workspaces, requires that no external workspace is open
- * Note that this method does NOT clear the inputs to each layer - instead, they are in the WS_ALL_LAYERS_ACT workspace - * for use in later backprop. - * - * @param layerIndex Index (inclusive) to stop forward pass at. For all layers, use numLayers-1 - * @param fwdPassType Type of forward pass to perform (STANDARD or RNN_ACTIVATE_WITH_STORED_STATE only) - * @param storeLastForTBPTT ONLY used if fwdPassType == FwdPassType.RNN_ACTIVATE_WITH_STORED_STATE - * @param input Input to network - * @param fMask Feature mask array. May be null - * @param lMask Label mask aray. May be null. - * @return - */ - protected synchronized List ffToLayerActivationsInWs(int layerIndex, @NonNull FwdPassType fwdPassType, boolean storeLastForTBPTT, - @NonNull INDArray input, INDArray fMask, INDArray lMask){ - setInput(input); - setLayerMaskArrays(fMask, lMask); - - LayerWorkspaceMgr workspaceMgr; - if(layerWiseConfigurations.getTrainingWorkspaceMode() == WorkspaceMode.NONE){ - WorkspaceUtils.assertNoWorkspacesOpen("Expected no workspace active in ffToLayerActivationsInWs when training workspace is set to NONE"); - workspaceMgr = LayerWorkspaceMgr.noWorkspaces(); - } else { - workspaceMgr = LayerWorkspaceMgr.builder() - .with(ArrayType.INPUT, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) - .with(ArrayType.ACTIVATIONS, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) - .with(ArrayType.FF_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) - .with(ArrayType.RNN_FF_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM_CONFIG) - .build(); - - if(input.isAttached()){ - //Don't leverage out of async DataSetIterator workspaces - workspaceMgr.setNoLeverageOverride(input.data().getParentWorkspace().getId()); - } - - if(layerWiseConfigurations.getCacheMode() != CacheMode.NONE){ - //For now: store cache mode activations in activations workspace - workspaceMgr.setWorkspace(ArrayType.FF_CACHE, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG); - workspaceMgr.setWorkspace(ArrayType.BP_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG); - } - - WorkspaceUtils.assertOpenAndActive(WS_ALL_LAYERS_ACT, "ffToLayerActivationsInWs method requires workspace WS_ALL_LAYERS_ACT to be open"); - } - workspaceMgr.setHelperWorkspacePointers(helperWorkspaces); - - List out = new ArrayList<>(); - out.add(workspaceMgr.leverageTo(ArrayType.INPUT, input)); //Probably unnecessary usually - - boolean traceLog = log.isTraceEnabled(); - - for( int i = 0; i <=layerIndex; i++) { - try(MemoryWorkspace wsFFWorking = workspaceMgr.notifyScopeEntered(ArrayType.FF_WORKING_MEM)){ - if (getLayerWiseConfigurations().getInputPreProcess(i) != null) { - input = getLayerWiseConfigurations().getInputPreProcess(i).preProcess(input, getInputMiniBatchSize(), workspaceMgr); - //Validation: Exception if invalid (bad preprocessor implementation) - validateArrayWorkspaces(workspaceMgr, input, ArrayType.ACTIVATIONS, i, true, "Feed forward to layer (training)"); - } - - if(traceLog){ - log.trace("About to forward pass: {} - {}", i, layers[i].getClass().getSimpleName()); - } - - if(fwdPassType == FwdPassType.STANDARD){ - input = layers[i].activate(input, true, workspaceMgr); - } else if(fwdPassType == FwdPassType.RNN_ACTIVATE_WITH_STORED_STATE){ - if (layers[i] instanceof RecurrentLayer) { - input = ((RecurrentLayer) layers[i]).rnnActivateUsingStoredState(input, true, storeLastForTBPTT, workspaceMgr); - }else if(layers[i] instanceof BaseWrapperLayer && ((BaseWrapperLayer)layers[i]).getUnderlying() instanceof RecurrentLayer) { - RecurrentLayer rl = (RecurrentLayer) ((BaseWrapperLayer)layers[i]).getUnderlying(); - input = rl.rnnActivateUsingStoredState(input, true, storeLastForTBPTT, workspaceMgr); - } else if (layers[i] instanceof MultiLayerNetwork) { - List temp = ((MultiLayerNetwork) layers[i]).rnnActivateUsingStoredState(input, true, storeLastForTBPTT); - input = temp.get(temp.size() - 1); - } else { - input = layers[i].activate(input, true, workspaceMgr); - } - } else { - throw new IllegalStateException("FwdPassType not supported for this method: " + fwdPassType); - } - - if(input == null){ - throw new IllegalStateException("Layer " + i + " returned null activations"); - } - - //Validation: Exception if invalid (bad layer implementation) - validateArrayWorkspaces(workspaceMgr, input, ArrayType.ACTIVATIONS, i, false, "Feed forward to layer (training)"); - validateArrayWorkspaces(workspaceMgr, layers[i].input(), ArrayType.INPUT, i, false, "Feed forward to layer (training)"); - - out.add(input); - - if(traceLog){ - log.trace("Completed forward pass: {} - {}", i, layers[i].getClass().getSimpleName()); - } - } - } - - return out; - } - - /** - * Provide the output of the specified layer, detached from any workspace. This is most commonly used at inference/test - * time, and is more memory efficient than {@link #ffToLayerActivationsDetached(boolean, FwdPassType, boolean, int, INDArray, INDArray, INDArray, boolean)} - * and {@link #ffToLayerActivationsInWs(int, FwdPassType, boolean, INDArray, INDArray, INDArray)}.
- * This method clears all layer inputs. - * - * NOTE: in general, no workspaces should be activated externally for this method! - * This method handles the workspace activation as required - * - * @param train Training mode (true) or test/inference mode (false) - * @param fwdPassType Type of forward pass to perform (STANDARD, RNN_TIMESTEP or RNN_ACTIVATE_WITH_STORED_STATE) - * @param layerIndex Index (inclusive) to stop forward pass at. For all layers, use numLayers-1 - * @param input Input to the network - * @param featureMask Input/feature mask array. May be null. - * @param labelsMask Labels mask array. May be null - * @param outputWorkspace Optional - if provided, outputs should be placed in this workspace. NOTE: this workspace - * must be open - * @return Output of the specified layer, detached from any workspace - */ - protected INDArray outputOfLayerDetached(boolean train, @NonNull FwdPassType fwdPassType, int layerIndex, @NonNull INDArray input, - INDArray featureMask, INDArray labelsMask, MemoryWorkspace outputWorkspace){ - setInput(input); - setLayerMaskArrays(featureMask, labelsMask); + return out; + } + + /** + * Provide the output of the specified layer, detached from any workspace. This is most commonly + * used at inference/test time, and is more memory efficient than + * {@link #ffToLayerActivationsDetached(boolean, FwdPassType, boolean, int, INDArray, INDArray, + * INDArray, boolean)} and + * {@link #ffToLayerActivationsInWs(int, FwdPassType, boolean, INDArray, INDArray, INDArray)}.
+ * This method clears all layer inputs. + *

+ * NOTE: in general, no workspaces should be activated externally for this method! This method + * handles the workspace activation as required + * + * @param train Training mode (true) or test/inference mode (false) + * @param fwdPassType Type of forward pass to perform (STANDARD, RNN_TIMESTEP or + * RNN_ACTIVATE_WITH_STORED_STATE) + * @param layerIndex Index (inclusive) to stop forward pass at. For all layers, use + * numLayers-1 + * @param input Input to the network + * @param featureMask Input/feature mask array. May be null. + * @param labelsMask Labels mask array. May be null + * @param outputWorkspace Optional - if provided, outputs should be placed in this workspace. + * NOTE: this workspace must be open + * @return Output of the specified layer, detached from any workspace + */ + protected INDArray outputOfLayerDetached(boolean train, @NonNull FwdPassType fwdPassType, + int layerIndex, @NonNull INDArray input, + INDArray featureMask, INDArray labelsMask, MemoryWorkspace outputWorkspace) { + setInput(input); + setLayerMaskArrays(featureMask, labelsMask); /* Idea here: we want to minimize memory, and return only the final array @@ -1203,672 +1341,731 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, org.d Additionally, we'll reconfigure the workspace manager for the *final* layer, so that we don't have to detach */ - if(outputWorkspace == null || outputWorkspace instanceof DummyWorkspace) { - WorkspaceUtils.assertNoWorkspacesOpen("Expected no workspace active in outputOfLayerDetached", true); - } else { - Preconditions.checkState(outputWorkspace.isScopeActive(), "Workspace \"" + outputWorkspace.getId() + - "\" was provided for the network/layer outputs. When provided, this workspace must be opened before " + - "calling the output method; furthermore, closing the workspace is the responsibility of the user"); + if (outputWorkspace == null || outputWorkspace instanceof DummyWorkspace) { + WorkspaceUtils.assertNoWorkspacesOpen("Expected no workspace active in outputOfLayerDetached", + true); + } else { + Preconditions.checkState(outputWorkspace.isScopeActive(), + "Workspace \"" + outputWorkspace.getId() + + "\" was provided for the network/layer outputs. When provided, this workspace must be opened before " + + + "calling the output method; furthermore, closing the workspace is the responsibility of the user"); + } + + LayerWorkspaceMgr mgrEven; + LayerWorkspaceMgr mgrOdd; + + WorkspaceMode wsm = train ? layerWiseConfigurations.getTrainingWorkspaceMode() + : layerWiseConfigurations.getInferenceWorkspaceMode(); + if (wsm == WorkspaceMode.NONE) { + mgrEven = LayerWorkspaceMgr.noWorkspaces(); + mgrOdd = mgrEven; + + //Check for external workspace - doesn't make sense to have one with workspace mode NONE + if (outputWorkspace != null && !(outputWorkspace instanceof DummyWorkspace)) { + throw new IllegalStateException("Workspace \"" + outputWorkspace.getId() + + "\" was provided for the network/layer outputs, however " + (train ? "training" + : "inference") + + " workspace mode is set to NONE. Cannot put output activations into the specified workspace if" + + + "workspaces are disabled for the network. use getConfiguration().setTraining/InferenceWorkspaceMode(WorkspaceMode.ENABLED)"); + } + } else { + mgrEven = LayerWorkspaceMgr.builder() + .with(ArrayType.FF_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) + .with(ArrayType.ACTIVATIONS, WS_LAYER_ACT_1, WS_LAYER_ACT_X_CONFIG) + .with(ArrayType.INPUT, WS_LAYER_ACT_2, + WS_LAYER_ACT_X_CONFIG) //Inputs should always be in the previous WS + .with(ArrayType.RNN_FF_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, + WS_RNN_LOOP_WORKING_MEM_CONFIG) + .build(); + + mgrOdd = LayerWorkspaceMgr.builder() + .with(ArrayType.FF_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) + .with(ArrayType.ACTIVATIONS, WS_LAYER_ACT_2, WS_LAYER_ACT_X_CONFIG) + .with(ArrayType.INPUT, WS_LAYER_ACT_1, + WS_LAYER_ACT_X_CONFIG) //Inputs should always be in the previous WS + .with(ArrayType.RNN_FF_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, + WS_RNN_LOOP_WORKING_MEM_CONFIG) + .build(); + } + mgrEven.setHelperWorkspacePointers(helperWorkspaces); + mgrOdd.setHelperWorkspacePointers(helperWorkspaces); + + MemoryWorkspace wsActCloseNext = null; + MemoryWorkspace temp = null; + MemoryWorkspace initialWorkspace = Nd4j.getMemoryManager().getCurrentWorkspace(); + + boolean traceLog = log.isTraceEnabled(); + + Throwable t = null; + try { + for (int i = 0; i <= layerIndex; i++) { + LayerWorkspaceMgr mgr = (i % 2 == 0 ? mgrEven : mgrOdd); + + if (traceLog) { + log.trace("About to forward pass: {} - {}", i, layers[i].getClass().getSimpleName()); } - LayerWorkspaceMgr mgrEven; - LayerWorkspaceMgr mgrOdd; + //Edge case: for first layer with dropout, inputs can't be in previous workspace (as it hasn't been opened yet) + //Hence: put inputs in working memory + if (i == 0 && wsm != WorkspaceMode.NONE) { + mgr.setWorkspace(ArrayType.INPUT, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG); + } - WorkspaceMode wsm = train ? layerWiseConfigurations.getTrainingWorkspaceMode() : layerWiseConfigurations.getInferenceWorkspaceMode(); - if(wsm == WorkspaceMode.NONE){ - mgrEven = LayerWorkspaceMgr.noWorkspaces(); - mgrOdd = mgrEven; + try (MemoryWorkspace wsFFWorking = mgr.notifyScopeEntered( + ArrayType.FF_WORKING_MEM)) { //Working memory: opened/closed once per layer + //Activations workspaces: opened/closed every second layer. + //So mgrEven (WS_LAYER_ACT_1) open at start of 0, 2, 4, 8; closed at end of 1, 3, 5, 7 etc + //and mgrOdd (WS_LAYER_ACT_2) opened at start of 1, 3, 5, 7; closed at end of 2, 4, 6, 8 etc + temp = mgr.notifyScopeEntered(ArrayType.ACTIVATIONS); - //Check for external workspace - doesn't make sense to have one with workspace mode NONE - if(outputWorkspace != null && !(outputWorkspace instanceof DummyWorkspace)){ - throw new IllegalStateException("Workspace \"" + outputWorkspace.getId() + - "\" was provided for the network/layer outputs, however " + (train ? "training" : "inference") + - " workspace mode is set to NONE. Cannot put output activations into the specified workspace if" + - "workspaces are disabled for the network. use getConfiguration().setTraining/InferenceWorkspaceMode(WorkspaceMode.ENABLED)"); + //Note that because we're opening activation workspaces not in a simple nested order, we'll manually + // override the previous workspace setting. Otherwise, when we close these workspaces, the "current" + // workspace may be set to the incorrect one + temp.setPreviousWorkspace(initialWorkspace); + + if (i == 0 && input.isAttached()) { + //Don't leverage out of async DataSetIterator workspaces + mgr.setNoLeverageOverride(input.data().getParentWorkspace().getId()); + } + + if (getLayerWiseConfigurations().getInputPreProcess(i) != null) { + input = getLayerWiseConfigurations().getInputPreProcess(i) + .preProcess(input, getInputMiniBatchSize(), mgr); + //Validation: Exception if invalid (bad preprocessor implementation) + validateArrayWorkspaces(mgr, input, ArrayType.ACTIVATIONS, i, true, + "Output of layer (inference)"); + } + + if (i == layerIndex) { + if (outputWorkspace != null && !(outputWorkspace instanceof DummyWorkspace)) { + //Place activations in user-specified workspace + mgr.setWorkspace(ArrayType.ACTIVATIONS, outputWorkspace.getId(), + outputWorkspace.getWorkspaceConfiguration()); + } else { + //Final activations: should be detached + mgr.setScopedOutFor(ArrayType.ACTIVATIONS); } - } else { - mgrEven = LayerWorkspaceMgr.builder() - .with(ArrayType.FF_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) - .with(ArrayType.ACTIVATIONS, WS_LAYER_ACT_1, WS_LAYER_ACT_X_CONFIG) - .with(ArrayType.INPUT, WS_LAYER_ACT_2, WS_LAYER_ACT_X_CONFIG) //Inputs should always be in the previous WS - .with(ArrayType.RNN_FF_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM_CONFIG) - .build(); + } - mgrOdd = LayerWorkspaceMgr.builder() - .with(ArrayType.FF_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) - .with(ArrayType.ACTIVATIONS, WS_LAYER_ACT_2, WS_LAYER_ACT_X_CONFIG) - .with(ArrayType.INPUT, WS_LAYER_ACT_1, WS_LAYER_ACT_X_CONFIG) //Inputs should always be in the previous WS - .with(ArrayType.RNN_FF_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM_CONFIG) - .build(); + if (fwdPassType == FwdPassType.STANDARD) { + //Standard feed-forward case + if (i > 0 && ConvolutionUtils.layerHasConvolutionLayout(layers[i - 1].conf().getLayer()) + && ConvolutionUtils.layerHasConvolutionLayout(layers[i].conf().getLayer())) { + + CNN2DFormat preLayerFormat = ConvolutionUtils.getFormatForLayer( + layers[i - 1].conf().getLayer()); + CNN2DFormat currLayerFormat = ConvolutionUtils.getFormatForLayer( + layers[i].conf().getLayer()); + if (preLayerFormat != currLayerFormat) { + //NHWC case + if (preLayerFormat == CNN2DFormat.NCHW) { + input = input.permute(0, 3, 1, 2); + } + //NCHW case + else if (preLayerFormat == CNN2DFormat.NHWC) { + input = input.permute(0, 2, 3, 1); + + } else { + throw new IllegalStateException( + "No CNN2DDataFormat type found for previous layer!"); + } + } + + input = layers[i].activate(input, train, mgr); + } else if (i > 0 && Convolution1DUtils.hasRnnDataFormat(layers[i - 1].conf().getLayer()) + && Convolution1DUtils.hasRnnDataFormat(layers[i].conf().getLayer())) { + RNNFormat preLayerFormat = Convolution1DUtils.getRnnFormatFromLayer( + layers[i - 1].conf().getLayer()); + RNNFormat currLayerFormat = Convolution1DUtils.getRnnFormatFromLayer( + layers[i].conf().getLayer()); + //permute for next layer + if (preLayerFormat != currLayerFormat) { + input = input.permute(0, 2, 1); + } + + input = layers[i].activate(input, train, mgr); + + + } else { + input = layers[i].activate(input, train, mgr); + } + } else if (fwdPassType == FwdPassType.RNN_TIMESTEP) { + //rnnTimeStep case + if (layers[i] instanceof RecurrentLayer) { + input = ((RecurrentLayer) layers[i]).rnnTimeStep(reshapeTimeStepInput(input), mgr); + } else if (layers[i] instanceof BaseWrapperLayer + && ((BaseWrapperLayer) layers[i]).getUnderlying() instanceof RecurrentLayer) { + RecurrentLayer rl = ((RecurrentLayer) ((BaseWrapperLayer) layers[i]).getUnderlying()); + input = rl.rnnTimeStep(reshapeTimeStepInput(input), mgr); + } else if (layers[i] instanceof MultiLayerNetwork) { + input = ((MultiLayerNetwork) layers[i]).rnnTimeStep(reshapeTimeStepInput(input)); + } else { + input = layers[i].activate(input, false, mgr); + } + } else { + throw new IllegalArgumentException( + "Unsupported forward pass type for this method: " + fwdPassType); + } + layers[i].clear(); + //Validation: Exception if invalid (bad layer implementation) + validateArrayWorkspaces(mgr, input, ArrayType.ACTIVATIONS, i, false, + "Output of layer (inference)"); + + if (wsActCloseNext != null) { + wsActCloseNext.close(); + } + wsActCloseNext = temp; + temp = null; } - mgrEven.setHelperWorkspacePointers(helperWorkspaces); - mgrOdd.setHelperWorkspacePointers(helperWorkspaces); - MemoryWorkspace wsActCloseNext = null; - MemoryWorkspace temp = null; - MemoryWorkspace initialWorkspace = Nd4j.getMemoryManager().getCurrentWorkspace(); + if (traceLog) { + log.trace("Completed forward pass: {} - {}", i, layers[i].getClass().getSimpleName()); + } - boolean traceLog = log.isTraceEnabled(); - - Throwable t = null; + //Edge case: for first layer with dropout, inputs can't be in previous workspace (as it hasn't been opened yet) + //Hence: put inputs in working memory -> set back to default for next use of workspace mgr + if (i == 0 && wsm != WorkspaceMode.NONE) { + mgr.setWorkspace(ArrayType.INPUT, WS_LAYER_ACT_2, + WS_LAYER_ACT_X_CONFIG); //Inputs should always be in the previous WS + } + } + } catch (Throwable t2) { + t = t2; + } finally { + if (wsActCloseNext != null) { try { - for (int i = 0; i <= layerIndex; i++) { - LayerWorkspaceMgr mgr = (i % 2 == 0 ? mgrEven : mgrOdd); - - if (traceLog) { - log.trace("About to forward pass: {} - {}", i, layers[i].getClass().getSimpleName()); - } - - //Edge case: for first layer with dropout, inputs can't be in previous workspace (as it hasn't been opened yet) - //Hence: put inputs in working memory - if (i == 0 && wsm != WorkspaceMode.NONE) { - mgr.setWorkspace(ArrayType.INPUT, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG); - } - - try (MemoryWorkspace wsFFWorking = mgr.notifyScopeEntered(ArrayType.FF_WORKING_MEM)) { //Working memory: opened/closed once per layer - //Activations workspaces: opened/closed every second layer. - //So mgrEven (WS_LAYER_ACT_1) open at start of 0, 2, 4, 8; closed at end of 1, 3, 5, 7 etc - //and mgrOdd (WS_LAYER_ACT_2) opened at start of 1, 3, 5, 7; closed at end of 2, 4, 6, 8 etc - temp = mgr.notifyScopeEntered(ArrayType.ACTIVATIONS); - - //Note that because we're opening activation workspaces not in a simple nested order, we'll manually - // override the previous workspace setting. Otherwise, when we close these workspaces, the "current" - // workspace may be set to the incorrect one - temp.setPreviousWorkspace(initialWorkspace); - - - if (i == 0 && input.isAttached()) { - //Don't leverage out of async DataSetIterator workspaces - mgr.setNoLeverageOverride(input.data().getParentWorkspace().getId()); - } - - if (getLayerWiseConfigurations().getInputPreProcess(i) != null) { - input = getLayerWiseConfigurations().getInputPreProcess(i).preProcess(input, getInputMiniBatchSize(), mgr); - //Validation: Exception if invalid (bad preprocessor implementation) - validateArrayWorkspaces(mgr, input, ArrayType.ACTIVATIONS, i, true, "Output of layer (inference)"); - } - - if (i == layerIndex) { - if (outputWorkspace != null && !(outputWorkspace instanceof DummyWorkspace)) { - //Place activations in user-specified workspace - mgr.setWorkspace(ArrayType.ACTIVATIONS, outputWorkspace.getId(), outputWorkspace.getWorkspaceConfiguration()); - } else { - //Final activations: should be detached - mgr.setScopedOutFor(ArrayType.ACTIVATIONS); - } - } - - if (fwdPassType == FwdPassType.STANDARD) { - //Standard feed-forward case - if(i > 0 && ConvolutionUtils.layerHasConvolutionLayout(layers[i - 1].conf().getLayer()) - && ConvolutionUtils.layerHasConvolutionLayout(layers[i].conf().getLayer())) { - - CNN2DFormat preLayerFormat = ConvolutionUtils.getFormatForLayer(layers[i - 1].conf().getLayer()); - CNN2DFormat currLayerFormat = ConvolutionUtils.getFormatForLayer(layers[i].conf().getLayer()); - if(preLayerFormat != currLayerFormat) { - //NHWC case - if(preLayerFormat == CNN2DFormat.NCHW) { - input = input.permute(0,3,1,2); - } - //NCHW case - else if(preLayerFormat == CNN2DFormat.NHWC) { - input = input.permute(0,2,3,1); - - } - else - throw new IllegalStateException("No CNN2DDataFormat type found for previous layer!"); - } - - input = layers[i].activate(input, train, mgr); - } else if(i > 0 && Convolution1DUtils.hasRnnDataFormat(layers[i - 1].conf().getLayer()) - && Convolution1DUtils.hasRnnDataFormat(layers[i].conf().getLayer())) { - RNNFormat preLayerFormat = Convolution1DUtils.getRnnFormatFromLayer(layers[i - 1].conf().getLayer()); - RNNFormat currLayerFormat = Convolution1DUtils.getRnnFormatFromLayer(layers[i].conf().getLayer()); - //permute for next layer - if(preLayerFormat != currLayerFormat) { - input = input.permute(0,2,1); - } - - input = layers[i].activate(input, train, mgr); - - - } else - input = layers[i].activate(input, train, mgr); - } else if (fwdPassType == FwdPassType.RNN_TIMESTEP) { - //rnnTimeStep case - if (layers[i] instanceof RecurrentLayer) { - input = ((RecurrentLayer) layers[i]).rnnTimeStep(reshapeTimeStepInput(input), mgr); - } else if (layers[i] instanceof BaseWrapperLayer && ((BaseWrapperLayer) layers[i]).getUnderlying() instanceof RecurrentLayer) { - RecurrentLayer rl = ((RecurrentLayer) ((BaseWrapperLayer) layers[i]).getUnderlying()); - input = rl.rnnTimeStep(reshapeTimeStepInput(input), mgr); - } else if (layers[i] instanceof MultiLayerNetwork) { - input = ((MultiLayerNetwork) layers[i]).rnnTimeStep(reshapeTimeStepInput(input)); - } else { - input = layers[i].activate(input, false, mgr); - } - } else { - throw new IllegalArgumentException("Unsupported forward pass type for this method: " + fwdPassType); - } - layers[i].clear(); - //Validation: Exception if invalid (bad layer implementation) - validateArrayWorkspaces(mgr, input, ArrayType.ACTIVATIONS, i, false, "Output of layer (inference)"); - - if (wsActCloseNext != null) { - wsActCloseNext.close(); - } - wsActCloseNext = temp; - temp = null; - } - - if (traceLog) { - log.trace("Completed forward pass: {} - {}", i, layers[i].getClass().getSimpleName()); - } - - //Edge case: for first layer with dropout, inputs can't be in previous workspace (as it hasn't been opened yet) - //Hence: put inputs in working memory -> set back to default for next use of workspace mgr - if (i == 0 && wsm != WorkspaceMode.NONE) { - mgr.setWorkspace(ArrayType.INPUT, WS_LAYER_ACT_2, WS_LAYER_ACT_X_CONFIG); //Inputs should always be in the previous WS - } - } - } catch (Throwable t2){ - t = t2; - } finally { - if(wsActCloseNext != null){ - try { - wsActCloseNext.close(); - } catch (Throwable t2){ - if(t != null){ - log.error("Encountered second exception while trying to close workspace after initial exception"); - log.error("Original exception:", t); - throw t2; - } - } - } - if(temp != null){ - //Should only be non-null on exception - while(temp.isScopeActive()){ - //For safety, should never occur in theory: a single close() call may not be sufficient, if - // workspace scope was borrowed and not properly closed when exception occurred - try{ - temp.close(); - } catch (Throwable t2){ - if(t != null){ - log.error("Encountered second exception while trying to close workspace after initial exception"); - log.error("Original exception:", t); - throw t2; - } - } - } + wsActCloseNext.close(); + } catch (Throwable t2) { + if (t != null) { + log.error( + "Encountered second exception while trying to close workspace after initial exception"); + log.error("Original exception:", t); + throw t2; + } + } + } + if (temp != null) { + //Should only be non-null on exception + while (temp.isScopeActive()) { + //For safety, should never occur in theory: a single close() call may not be sufficient, if + // workspace scope was borrowed and not properly closed when exception occurred + try { + temp.close(); + } catch (Throwable t2) { + if (t != null) { + log.error( + "Encountered second exception while trying to close workspace after initial exception"); + log.error("Original exception:", t); + throw t2; } + } + } + } - Nd4j.getMemoryManager().setCurrentWorkspace(initialWorkspace); + Nd4j.getMemoryManager().setCurrentWorkspace(initialWorkspace); - if(t != null){ - if(t instanceof RuntimeException){ - throw ((RuntimeException)t); - } - throw new RuntimeException("Error during neural network forward pass", t); - } + if (t != null) { + if (t instanceof RuntimeException) { + throw ((RuntimeException) t); + } + throw new RuntimeException("Error during neural network forward pass", t); + } - if(outputWorkspace == null || outputWorkspace instanceof DummyWorkspace) { - WorkspaceUtils.assertNoWorkspacesOpen("Expected no workspace active at the end of outputOfLayerDetached", true); - } else { - Preconditions.checkState(outputWorkspace.isScopeActive(), "Expected output workspace to still be open" + - "at end of outputOfLayerDetached, but it is closed. This suggests an implementation or layer workspace problem"); - } + if (outputWorkspace == null || outputWorkspace instanceof DummyWorkspace) { + WorkspaceUtils.assertNoWorkspacesOpen( + "Expected no workspace active at the end of outputOfLayerDetached", true); + } else { + Preconditions.checkState(outputWorkspace.isScopeActive(), + "Expected output workspace to still be open" + + "at end of outputOfLayerDetached, but it is closed. This suggests an implementation or layer workspace problem"); + } + } + + return input; + } + + private INDArray reshapeTimeStepInput(INDArray input) { + if (input.rank() == 2) { // dynamically reshape to 3D input with one time-step. + long[] inShape = input.shape(); + input = input.reshape(inShape[0], inShape[1], 1); + } + return input; + } + + /** + * Compute activations of all layers from input (inclusive) to output of the final/output layer. + * Equivalent to calling {@link #feedForward(boolean)} with train=false + * + * @return the list of activations for each layer, including the input + */ + public List feedForward() { + return feedForward(false); + } + + /** + * Compute activations of all layers from input (inclusive) to output of the final/output layer. + * Equivalent to calling {@link #feedForward(INDArray, boolean)} with train = false + * + * @return the list of activations for each layer, including the input + */ + public List feedForward(INDArray input) { + if (input == null) { + throw new IllegalStateException("Unable to perform feed forward; no input found"); + } + setInput(input); + return feedForward(); + } + + /** + * Compute the activations from the input to the output layer, given mask arrays (that may be + * null) The masking arrays are used in situations such an one-to-many and many-to-one rucerrent + * neural network (RNN) designs, as well as for supporting time series of varying lengths within + * the same minibatch for RNNs. Other than mask arrays, this is equivalent to calling + * {@link #feedForward(INDArray, boolean)} with train = false + */ + public List feedForward(INDArray input, INDArray featuresMask, INDArray labelsMask) { + setLayerMaskArrays(featuresMask, labelsMask); + List list = feedForward(input); + clearLayerMaskArrays(); + return list; + } + + @Override + public Gradient gradient() { + return gradient; + } + + @Override + public Pair gradientAndScore() { + return new Pair<>(gradient(), score()); + } + + /** + * Clone the MultiLayerNetwork + * + * @return A cloned MultiLayerNetwork with a copy of the configuration, parameters and updater + * identical to the current network. + */ + @Override + public MultiLayerNetwork clone() { + if (!initCalled) { + init(); + } + MultiLayerConfiguration conf = this.layerWiseConfigurations.clone(); + MultiLayerNetwork ret = new MultiLayerNetwork(conf); + ret.init(this.params().dup(), false); + + if (solver != null) { + //If solver is null: updater hasn't been initialized -> getUpdater call will force initialization, however + Updater u = this.getUpdater(); + INDArray updaterState = u.getStateViewArray(); + if (updaterState != null) { + ret.getUpdater().setStateViewArray(ret, updaterState.dup(), false); + } + } + + if (hasAFrozenLayer()) { + //correct layers to frozen layers + Layer[] clonedLayers = ret.getLayers(); + for (int i = 0; i < layers.length; i++) { + if (layers[i] instanceof FrozenLayer) { + clonedLayers[i] = new FrozenLayer(ret.getLayer(i)); + } + } + ret.setLayers(clonedLayers); + } + return ret; + } + + protected boolean hasAFrozenLayer() { + for (int i = 0; i < layers.length - 1; i++) { + if (layers[i] instanceof FrozenLayer) { + return true; + } + } + return false; + } + + /** + * @deprecated To be removed. Use {@link #params()} instead + */ + @Deprecated + public INDArray params(boolean backwardOnly) { + return params(); + } + + /** + * Returns a 1 x m vector where the vector is composed of a flattened vector of all of the + * parameters in the network.
See {@link #getParam(String)} and {@link #paramTable()} for a + * more useful/interpretable representation of the parameters.
Note that the parameter vector + * is not a copy, and changes to the returned INDArray will impact the network parameters. + * + * @return the parameters for this neural net + */ + @Override + public INDArray params() { + return flattenedParams; + } + + /** + * Set the parameters for this model. This expects a linear ndarray which then be unpacked + * internally relative to the expected ordering of the model.
See also: + * {@link #setParamTable(Map)} and {@link #setParam(String, INDArray)} + * + * @param params the parameters for the model + */ + @Override + public void setParams(INDArray params) { + if (flattenedParams == params) { + return; //No op + } + + if (flattenedParams != null && params.length() == flattenedParams.length()) { + if (params != flattenedParams) { + flattenedParams.assign(params); + } + } else { + if (flattenedParams == null) { + flattenedParams = params.dup(); + } + int idx = 0; + for (int i = 0; i < getLayers().length; i++) { + Layer layer = getLayer(i); + long range = layer.numParams(); + if (range <= 0) { + continue; //Some layers: no parameters (subsampling, etc) + } + INDArray get = params.get(NDArrayIndex.interval(0, 0, true), + NDArrayIndex.interval(idx, range + idx)); + layer.setParams(get); + idx += range; + } + } + } + + @Override + public void setParamsViewArray(INDArray params) { + throw new UnsupportedOperationException("Not yet implemented"); + } + + @Override + public INDArray getGradientsViewArray() { + return flattenedGradients; + } + + @Override + public void setBackpropGradientsViewArray(INDArray gradients) { + int paramsSoFar = 0; + for (Layer layer : layers) { + if (layer.numParams() == 0) { + continue; + } + layer.setBackpropGradientsViewArray(gradients.get(NDArrayIndex.interval(0, 0, true), + NDArrayIndex.interval(paramsSoFar, paramsSoFar + layer.numParams()))); + paramsSoFar += layer.numParams(); + } + } + + @Override + public TrainingConfig getConfig() { + throw new UnsupportedOperationException("Not supported"); + } + + /** + * Returns the number of parameters in the network + * + * @return The number of parameters + */ + @Override + public long numParams() { + if (!isInitCalled()) { + init(); + } + return flattenedParams == null ? 0 : flattenedParams.length(); //Maybe nul for 0 params net + } + + /** + * Returns the number of parameters in the network + * + * @param backwards If true: exclude any parameters uned only in unsupervised layerwise training + * (such as the decoder parameters in an autoencoder) + * @return The number of parameters + */ + @Override + public long numParams(boolean backwards) { + int length = 0; + for (int i = 0; i < layers.length; i++) { + length += layers[i].numParams(backwards); + } + + return length; + } + + /** + * Sets the input and labels and returns the F1 score for the prediction with respect to the true + * labels + * + * @param data the data to score + * @return the score for the given input,label pairs + */ + @Override + public double f1Score(org.nd4j.linalg.dataset.api.DataSet data) { + return f1Score(data.getFeatures(), data.getLabels()); + } + + /** + * Perform minibatch training on all minibatches in the DataSetIterator, for the specified number + * of epochs. Equvalent to calling {@link #fit(DataSetIterator)} numEpochs times in a loop + * + * @param iterator Training data (DataSetIterator). Iterator must support resetting + * @param numEpochs Number of training epochs, >= 1 + */ + public void fit(@NonNull DataSetIterator iterator, int numEpochs) { + Preconditions.checkArgument(numEpochs > 0, "Number of epochs much be > 0. Got numEpochs = %s", + numEpochs); + Preconditions.checkArgument(numEpochs == 1 || iterator.resetSupported(), + "Cannot perform multiple epochs training using" + + "iterator thas does not support resetting (iterator.resetSupported() returned false)"); + + for (int i = 0; i < numEpochs; i++) { + fit(iterator); + } + } + + /** + * Perform minibatch training on all minibatches in the DataSetIterator for 1 epoch.
Note that + * this method does not do layerwise pretraining.
For pretraining use method pretrain.. + * {@link #pretrain(DataSetIterator)}
+ * + * @param iterator Training data (DataSetIterator) + */ + @Override + public void fit(DataSetIterator iterator) { + try { + fitHelper(iterator); + } catch (OutOfMemoryError e) { + CrashReportingUtil.writeMemoryCrashDump(this, e); + throw e; + } + } + + private synchronized void fitHelper(DataSetIterator iterator) { + // we're wrapping all iterators into AsyncDataSetIterator to provide background prefetch - where appropriate + DataSetIterator iter; + boolean destructable = false; + if (iterator.asyncSupported()) { + iter = new AsyncDataSetIterator(iterator, + Math.min(Nd4j.getAffinityManager().getNumberOfDevices() * 2, 2), true); + destructable = true; + } else { + iter = iterator; + } + + for (TrainingListener tl : trainingListeners) { + tl.onEpochStart(this); + } + + LayerWorkspaceMgr workspaceMgr; + if (getLayerWiseConfigurations().getTrainingWorkspaceMode() == WorkspaceMode.NONE) { + workspaceMgr = LayerWorkspaceMgr.noWorkspaces(); + } else { + workspaceMgr = LayerWorkspaceMgr.builder() + .with(ArrayType.ACTIVATIONS, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) + .with(ArrayType.INPUT, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) + .with(ArrayType.FF_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) + .with(ArrayType.BP_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) + .with(ArrayType.RNN_FF_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, + WS_RNN_LOOP_WORKING_MEM_CONFIG) + .with(ArrayType.RNN_BP_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, + WS_RNN_LOOP_WORKING_MEM_CONFIG) + //Note for updater working memory, we have the option to re-use WS_ALL_LAYERS_ACT or FF/BP_WORKING_MEM + // as these should be closed by the time updaters are executed + //Generally, WS_ALL_LAYERS_ACT will be the larger of the two, so we'll use this + .with(ArrayType.UPDATER_WORKING_MEM, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) + .build(); + } + workspaceMgr.setHelperWorkspacePointers(helperWorkspaces); + + update(TaskUtils.buildTask(iter)); + if (!iter.hasNext() && iter.resetSupported()) { + iter.reset(); + } + long time1 = System.currentTimeMillis(); + while (iter.hasNext()) { + + DataSet next = iter.next(); + long time2 = System.currentTimeMillis(); + + lastEtlTime.set((time2 - time1)); + + if (next.getFeatures() == null || next.getLabels() == null) { + break; + } + + // TODO: basically we want to wrap internals of this loop into workspace + + boolean hasMaskArrays = next.hasMaskArrays(); + + if (layerWiseConfigurations.getBackpropType() == BackpropType.TruncatedBPTT) { + doTruncatedBPTT(next.getFeatures(), next.getLabels(), next.getFeaturesMaskArray(), + next.getLabelsMaskArray(), workspaceMgr); + } else { + if (hasMaskArrays) { + setLayerMaskArrays(next.getFeaturesMaskArray(), next.getLabelsMaskArray()); } - return input; - } + setInput(next.getFeatures()); + setLabels(next.getLabels()); - private INDArray reshapeTimeStepInput(INDArray input) { - if (input.rank() == 2) { // dynamically reshape to 3D input with one time-step. - long[] inShape = input.shape(); - input = input.reshape(inShape[0], inShape[1], 1); + if (solver == null) { + try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { + solver = new Solver.Builder().configure(conf()).listeners(getListeners()).model(this) + .build(); + } } - return input; - } - /** - * Compute activations of all layers from input (inclusive) to output of the final/output layer. - * Equivalent to calling {@link #feedForward(boolean)} with train=false - * - * @return the list of activations for each layer, including the input - */ - public List feedForward() { - return feedForward(false); - } + //TODO CACHE + solver.optimize(workspaceMgr); + } - /** - * Compute activations of all layers from input (inclusive) to output of the final/output layer. - * Equivalent to calling {@link #feedForward(INDArray, boolean)} with train = false - * - * @return the list of activations for each layer, including the input - */ - public List feedForward(INDArray input) { - if (input == null) - throw new IllegalStateException("Unable to perform feed forward; no input found"); - setInput(input); - return feedForward(); - } - - /** - * Compute the activations from the input to the output layer, given mask arrays (that may be null) - * The masking arrays are used in situations such an one-to-many and many-to-one rucerrent neural network (RNN) - * designs, as well as for supporting time series of varying lengths within the same minibatch for RNNs. - * Other than mask arrays, this is equivalent to calling {@link #feedForward(INDArray, boolean)} with train = false - */ - public List feedForward(INDArray input, INDArray featuresMask, INDArray labelsMask) { - setLayerMaskArrays(featuresMask, labelsMask); - List list = feedForward(input); + if (hasMaskArrays) { clearLayerMaskArrays(); - return list; + } + + time1 = System.currentTimeMillis(); + synchronizeIterEpochCounts(); } - - @Override - public Gradient gradient() { - return gradient; + if (!trainingListeners.isEmpty()) { + for (TrainingListener tl : trainingListeners) { + tl.onEpochEnd(this); + } } - @Override - public Pair gradientAndScore() { - return new Pair<>(gradient(), score()); + clearLayersStates(); + + if (destructable) { + ((AsyncDataSetIterator) iter).shutdown(); } + incrementEpochCount(); + } - /** - * Clone the MultiLayerNetwork - * @return A cloned MultiLayerNetwork with a copy of the configuration, parameters and updater identical to the current network. - */ - @Override - public MultiLayerNetwork clone() { - if(!initCalled) - init(); - MultiLayerConfiguration conf = this.layerWiseConfigurations.clone(); - MultiLayerNetwork ret = new MultiLayerNetwork(conf); - ret.init(this.params().dup(), false); - - if (solver != null) { - //If solver is null: updater hasn't been initialized -> getUpdater call will force initialization, however - Updater u = this.getUpdater(); - INDArray updaterState = u.getStateViewArray(); - if (updaterState != null) { - ret.getUpdater().setStateViewArray(ret, updaterState.dup(), false); - } - } - - if (hasAFrozenLayer()) { - //correct layers to frozen layers - Layer[] clonedLayers = ret.getLayers(); - for (int i = 0; i < layers.length; i++) { - if (layers[i] instanceof FrozenLayer) { - clonedLayers[i] = new FrozenLayer(ret.getLayer(i)); - } - } - ret.setLayers(clonedLayers); - } - return ret; + /** + * Calculate parameter gradients and input activation gradients given the input and labels, and + * optionally mask arrays + * + * @param features Features for gradient calculation + * @param label Labels for gradient + * @param fMask Features mask array (may be null) + * @param labelMask Label mask array (may be null) + * @return A pair of gradient arrays: parameter gradients (in Gradient object) and input + * activation gradients + */ + public Pair calculateGradients(@NonNull INDArray features, + @NonNull INDArray label, + INDArray fMask, INDArray labelMask) { + try { + return calculateGradientsHelper(features, label, fMask, labelMask); + } catch (OutOfMemoryError e) { + CrashReportingUtil.writeMemoryCrashDump(this, e); + throw e; } + } - protected boolean hasAFrozenLayer() { - for (int i = 0; i < layers.length - 1; i++) { - if (layers[i] instanceof FrozenLayer) - return true; - } - return false; + private Pair calculateGradientsHelper(INDArray features, INDArray label, + INDArray fMask, + INDArray labelMask) { + setInput(features); + setLabels(label); + setLayerMaskArrays(fMask, labelMask); + + LayerWorkspaceMgr mgr; + if (layerWiseConfigurations.getTrainingWorkspaceMode() == WorkspaceMode.NONE) { + mgr = LayerWorkspaceMgr.noWorkspaces(); + } else { + mgr = LayerWorkspaceMgr.builder() + .with(ArrayType.INPUT, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) + .with(ArrayType.ACTIVATIONS, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) + .with(ArrayType.FF_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) + .with(ArrayType.BP_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) + .with(ArrayType.RNN_FF_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, + WS_RNN_LOOP_WORKING_MEM_CONFIG) + .with(ArrayType.RNN_BP_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, + WS_RNN_LOOP_WORKING_MEM_CONFIG) + .build(); + + if (layerWiseConfigurations.getCacheMode() != null) { + //For now: store cache mode activations in activations workspace + mgr.setWorkspace(ArrayType.FF_CACHE, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG); + } } + mgr.setHelperWorkspacePointers(helperWorkspaces); - - /** - * @deprecated To be removed. Use {@link #params()} instead - */ - @Deprecated - public INDArray params(boolean backwardOnly) { - return params(); - } - - - /** - * Returns a 1 x m vector where the vector is composed of a flattened vector of all of the parameters in the network.
- * See {@link #getParam(String)} and {@link #paramTable()} for a more useful/interpretable representation of the parameters.
- * Note that the parameter vector is not a copy, and changes to the returned INDArray will impact the network parameters. - * - * @return the parameters for this neural net - */ - @Override - public INDArray params() { - return flattenedParams; - } - - /** - * Set the parameters for this model. - * This expects a linear ndarray which then be unpacked internally relative to the expected ordering of the model.
- * See also: {@link #setParamTable(Map)} and {@link #setParam(String, INDArray)} - * - * @param params the parameters for the model - */ - @Override - public void setParams(INDArray params) { - if (flattenedParams == params) { - return; //No op - } - - if (flattenedParams != null && params.length() == flattenedParams.length()) { - if (params != flattenedParams) { - flattenedParams.assign(params); - } - } else { - if (flattenedParams == null) - flattenedParams = params.dup(); - int idx = 0; - for (int i = 0; i < getLayers().length; i++) { - Layer layer = getLayer(i); - long range = layer.numParams(); - if (range <= 0) - continue; //Some layers: no parameters (subsampling, etc) - INDArray get = params.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(idx, range + idx)); - layer.setParams(get); - idx += range; - } - } - } - - @Override - public void setParamsViewArray(INDArray params) { - throw new UnsupportedOperationException("Not yet implemented"); - } - - @Override - public INDArray getGradientsViewArray() { - return flattenedGradients; - } - - @Override - public void setBackpropGradientsViewArray(INDArray gradients) { - int paramsSoFar = 0; - for (Layer layer : layers) { - if (layer.numParams() == 0) - continue; - layer.setBackpropGradientsViewArray(gradients.get(NDArrayIndex.interval(0,0,true), - NDArrayIndex.interval(paramsSoFar, paramsSoFar + layer.numParams()))); - paramsSoFar += layer.numParams(); - } - } - - @Override - public TrainingConfig getConfig() { - throw new UnsupportedOperationException("Not supported"); - } - - /** - * Returns the number of parameters in the network - * - * @return The number of parameters - */ - @Override - public long numParams() { - if(!isInitCalled()) - init(); - return flattenedParams == null ? 0 : flattenedParams.length(); //Maybe nul for 0 params net - } - - /** - * Returns the number of parameters in the network - * - * @param backwards If true: exclude any parameters uned only in unsupervised layerwise training (such as the decoder - * parameters in an autoencoder) - * @return The number of parameters - */ - @Override - public long numParams(boolean backwards) { - int length = 0; - for (int i = 0; i < layers.length; i++) - length += layers[i].numParams(backwards); - - return length; - } - - /** - * Sets the input and labels and returns the F1 score for the prediction with respect to the true labels - * - * @param data the data to score - * @return the score for the given input,label pairs - */ - @Override - public double f1Score(org.nd4j.linalg.dataset.api.DataSet data) { - return f1Score(data.getFeatures(), data.getLabels()); - } - - /** - * Perform minibatch training on all minibatches in the DataSetIterator, for the specified number of epochs. - * Equvalent to calling {@link #fit(DataSetIterator)} numEpochs times in a loop - * - * @param iterator Training data (DataSetIterator). Iterator must support resetting - * @param numEpochs Number of training epochs, >= 1 - */ - public void fit(@NonNull DataSetIterator iterator, int numEpochs){ - Preconditions.checkArgument(numEpochs > 0, "Number of epochs much be > 0. Got numEpochs = %s", numEpochs); - Preconditions.checkArgument(numEpochs == 1 || iterator.resetSupported(), "Cannot perform multiple epochs training using" + - "iterator thas does not support resetting (iterator.resetSupported() returned false)"); - - for(int i=0; i - * Note that this method does not do layerwise pretraining.
- * For pretraining use method pretrain.. {@link #pretrain(DataSetIterator)}
- * @param iterator Training data (DataSetIterator) - */ - @Override - public void fit(DataSetIterator iterator) { - try{ - fitHelper(iterator); - } catch (OutOfMemoryError e){ - CrashReportingUtil.writeMemoryCrashDump(this, e); - throw e; - } - } - - private synchronized void fitHelper(DataSetIterator iterator){ - // we're wrapping all iterators into AsyncDataSetIterator to provide background prefetch - where appropriate - DataSetIterator iter; - boolean destructable = false; - if (iterator.asyncSupported()) { - iter = new AsyncDataSetIterator(iterator, Math.min(Nd4j.getAffinityManager().getNumberOfDevices() * 2, 2), true); - destructable = true; - } else { - iter = iterator; - } - + //Calculate activations (which are stored in each layer, and used in backprop) + try (MemoryWorkspace ws = mgr.notifyScopeEntered(ArrayType.ACTIVATIONS)) { + //First: do a feed-forward through the network + //Note that we don't actually need to do the full forward pass through the output layer right now; but we do + // need the input to the output layer to be set (such that backprop can be done) + List activations = ffToLayerActivationsInWs(layers.length - 2, FwdPassType.STANDARD, + false, input, mask, fMask); + if (!trainingListeners.isEmpty()) { + //TODO: We possibly do want output layer activations in some cases here... for (TrainingListener tl : trainingListeners) { - tl.onEpochStart(this); + tl.onForwardPass(this, activations); } + } + INDArray inputToOutputLayer = activations.get(activations.size() - 1); + if (layerWiseConfigurations.getInputPreProcess(layers.length - 1) != null) { + inputToOutputLayer = layerWiseConfigurations.getInputPreProcess(layers.length - 1) + .preProcess(inputToOutputLayer, getInputMiniBatchSize(), mgr); + //Validate activations location + } + getOutputLayer().setInput(inputToOutputLayer, mgr); - LayerWorkspaceMgr workspaceMgr; - if(getLayerWiseConfigurations().getTrainingWorkspaceMode() == WorkspaceMode.NONE){ - workspaceMgr = LayerWorkspaceMgr.noWorkspaces(); - } else { - workspaceMgr = LayerWorkspaceMgr.builder() - .with(ArrayType.ACTIVATIONS, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) - .with(ArrayType.INPUT, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) - .with(ArrayType.FF_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) - .with(ArrayType.BP_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) - .with(ArrayType.RNN_FF_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM_CONFIG) - .with(ArrayType.RNN_BP_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM_CONFIG) - //Note for updater working memory, we have the option to re-use WS_ALL_LAYERS_ACT or FF/BP_WORKING_MEM - // as these should be closed by the time updaters are executed - //Generally, WS_ALL_LAYERS_ACT will be the larger of the two, so we'll use this - .with(ArrayType.UPDATER_WORKING_MEM, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) - .build(); - } - workspaceMgr.setHelperWorkspacePointers(helperWorkspaces); - - update(TaskUtils.buildTask(iter)); - if (!iter.hasNext() && iter.resetSupported()) { - iter.reset(); - } - long time1 = System.currentTimeMillis(); - while (iter.hasNext()) { - - DataSet next = iter.next(); - long time2 = System.currentTimeMillis(); - - lastEtlTime.set((time2 - time1)); - - if (next.getFeatures() == null || next.getLabels() == null) - break; - - // TODO: basically we want to wrap internals of this loop into workspace - - - boolean hasMaskArrays = next.hasMaskArrays(); - - if (layerWiseConfigurations.getBackpropType() == BackpropType.TruncatedBPTT) { - doTruncatedBPTT(next.getFeatures(), next.getLabels(), next.getFeaturesMaskArray(), - next.getLabelsMaskArray(), workspaceMgr); - } else { - if (hasMaskArrays) - setLayerMaskArrays(next.getFeaturesMaskArray(), next.getLabelsMaskArray()); - - setInput(next.getFeatures()); - setLabels(next.getLabels()); - - if (solver == null) { - try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { - solver = new Solver.Builder().configure(conf()).listeners(getListeners()).model(this) - .build(); - } - } - - //TODO CACHE - solver.optimize(workspaceMgr); - } - - if (hasMaskArrays) - clearLayerMaskArrays(); - - time1 = System.currentTimeMillis(); - synchronizeIterEpochCounts(); - } - - if (!trainingListeners.isEmpty()) { - for (TrainingListener tl : trainingListeners) { - tl.onEpochEnd(this); - } - } - - clearLayersStates(); - - if (destructable) - ((AsyncDataSetIterator) iter).shutdown(); - - incrementEpochCount(); + Pair p = calcBackpropGradients(null, true, false, true); + if (p.getSecond() != null) { + p.setSecond(p.getSecond().detach()); + } + return p; } + } - /** - * Calculate parameter gradients and input activation gradients given the input and labels, and optionally mask arrays - * - * @param features Features for gradient calculation - * @param label Labels for gradient - * @param fMask Features mask array (may be null) - * @param labelMask Label mask array (may be null) - * @return A pair of gradient arrays: parameter gradients (in Gradient object) and input activation gradients - */ - public Pair calculateGradients(@NonNull INDArray features, @NonNull INDArray label, - INDArray fMask, INDArray labelMask) { - try{ - return calculateGradientsHelper(features, label, fMask, labelMask); - } catch (OutOfMemoryError e){ - CrashReportingUtil.writeMemoryCrashDump(this, e); - throw e; - } + /** + * Calculate gradients and errors. Used in two places: (a) backprop (for standard multi layer + * network learning) (b) backpropGradient (layer method, for when MultiLayerNetwork is used as a + * layer) + * + * @param epsilon Errors (technically errors .* activations). Not used if + * withOutputLayer = true + * @param withOutputLayer if true: assume last layer is output layer, and calculate errors + * based on labels. In this case, the epsilon input is not used + * (may/should be null). If false: calculate backprop gradients + * @param returnInputActGrad If true: terun the input activation gradients (detached). False: + * don't return + * @return Gradients and the error (epsilon) at the input + */ + protected Pair calcBackpropGradients(INDArray epsilon, + boolean withOutputLayer, boolean tbptt, + boolean returnInputActGrad) { + if (flattenedGradients == null) { + initGradientsView(); } + String multiGradientKey; + Gradient gradient = new DefaultGradient(flattenedGradients); - private Pair calculateGradientsHelper(INDArray features, INDArray label, INDArray fMask, - INDArray labelMask){ - setInput(features); - setLabels(label); - setLayerMaskArrays(fMask, labelMask); + LayerWorkspaceMgr mgrEven; + LayerWorkspaceMgr mgrOdd; - LayerWorkspaceMgr mgr; - if(layerWiseConfigurations.getTrainingWorkspaceMode() == WorkspaceMode.NONE){ - mgr = LayerWorkspaceMgr.noWorkspaces(); - } else { - mgr = LayerWorkspaceMgr.builder() - .with(ArrayType.INPUT, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) - .with(ArrayType.ACTIVATIONS, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) - .with(ArrayType.FF_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) - .with(ArrayType.BP_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) - .with(ArrayType.RNN_FF_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM_CONFIG) - .with(ArrayType.RNN_BP_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM_CONFIG) - .build(); - - if(layerWiseConfigurations.getCacheMode() != null){ - //For now: store cache mode activations in activations workspace - mgr.setWorkspace(ArrayType.FF_CACHE, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG); - } - } - mgr.setHelperWorkspacePointers(helperWorkspaces); - - //Calculate activations (which are stored in each layer, and used in backprop) - try(MemoryWorkspace ws = mgr.notifyScopeEntered(ArrayType.ACTIVATIONS)) { - //First: do a feed-forward through the network - //Note that we don't actually need to do the full forward pass through the output layer right now; but we do - // need the input to the output layer to be set (such that backprop can be done) - List activations = ffToLayerActivationsInWs(layers.length - 2, FwdPassType.STANDARD, false, input, mask, fMask); - if (!trainingListeners.isEmpty()) { - //TODO: We possibly do want output layer activations in some cases here... - for (TrainingListener tl : trainingListeners) { - tl.onForwardPass(this, activations); - } - } - INDArray inputToOutputLayer = activations.get(activations.size() - 1); - if (layerWiseConfigurations.getInputPreProcess(layers.length - 1) != null) { - inputToOutputLayer = layerWiseConfigurations.getInputPreProcess(layers.length - 1) - .preProcess(inputToOutputLayer, getInputMiniBatchSize(), mgr); - //Validate activations location - } - getOutputLayer().setInput(inputToOutputLayer, mgr); - - Pair p = calcBackpropGradients(null, true, false, true); - if(p.getSecond() != null){ - p.setSecond( p.getSecond().detach()); - } - return p; - } - } - - /** Calculate gradients and errors. Used in two places: - * (a) backprop (for standard multi layer network learning) - * (b) backpropGradient (layer method, for when MultiLayerNetwork is used as a layer) - * @param epsilon Errors (technically errors .* activations). Not used if withOutputLayer = true - * @param withOutputLayer if true: assume last layer is output layer, and calculate errors based on labels. In this - * case, the epsilon input is not used (may/should be null). - * If false: calculate backprop gradients - * @param returnInputActGrad If true: terun the input activation gradients (detached). False: don't return - * @return Gradients and the error (epsilon) at the input - */ - protected Pair calcBackpropGradients(INDArray epsilon, boolean withOutputLayer, boolean tbptt, - boolean returnInputActGrad) { - if (flattenedGradients == null) { - initGradientsView(); - } - String multiGradientKey; - Gradient gradient = new DefaultGradient(flattenedGradients); - - LayerWorkspaceMgr mgrEven; - LayerWorkspaceMgr mgrOdd; - - if(layerWiseConfigurations.getTrainingWorkspaceMode() == WorkspaceMode.NONE){ - mgrEven = LayerWorkspaceMgr.noWorkspaces(); - mgrOdd = mgrEven; - WorkspaceUtils.assertNoWorkspacesOpen("Expected no workspace active in calcBackpropGradients when " + - "training workspace is set to none"); - } else { + if (layerWiseConfigurations.getTrainingWorkspaceMode() == WorkspaceMode.NONE) { + mgrEven = LayerWorkspaceMgr.noWorkspaces(); + mgrOdd = mgrEven; + WorkspaceUtils.assertNoWorkspacesOpen( + "Expected no workspace active in calcBackpropGradients when " + + "training workspace is set to none"); + } else { /* Workspaces for backprop in MLN share some features with outputOfLayerDetached, in terms of the "two alternating workspaces" idea (but for activation gradients here, instead of activations there). @@ -1884,1422 +2081,1546 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, org.d */ - mgrEven = LayerWorkspaceMgr.builder() - //Activations in context of backprop (preOut methods etc) are not used outside of the layer itself - .with(ArrayType.ACTIVATIONS, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) - .with(ArrayType.INPUT, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) //Usually not required here. Exception: OutputLayer dropout - .with(ArrayType.ACTIVATION_GRAD, WS_LAYER_ACT_1, WS_LAYER_ACT_X_CONFIG) - .with(ArrayType.FF_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) - .with(ArrayType.BP_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) - .with(ArrayType.RNN_FF_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM_CONFIG) - .with(ArrayType.RNN_BP_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM_CONFIG) - .build(); + mgrEven = LayerWorkspaceMgr.builder() + //Activations in context of backprop (preOut methods etc) are not used outside of the layer itself + .with(ArrayType.ACTIVATIONS, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) + .with(ArrayType.INPUT, WS_ALL_LAYERS_ACT, + WS_ALL_LAYERS_ACT_CONFIG) //Usually not required here. Exception: OutputLayer dropout + .with(ArrayType.ACTIVATION_GRAD, WS_LAYER_ACT_1, WS_LAYER_ACT_X_CONFIG) + .with(ArrayType.FF_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) + .with(ArrayType.BP_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) + .with(ArrayType.RNN_FF_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, + WS_RNN_LOOP_WORKING_MEM_CONFIG) + .with(ArrayType.RNN_BP_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, + WS_RNN_LOOP_WORKING_MEM_CONFIG) + .build(); - mgrOdd = LayerWorkspaceMgr.builder() - //Activations in context of backprop (preOut methods etc) are not used outside of the layer itself - .with(ArrayType.ACTIVATIONS, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) - .with(ArrayType.INPUT, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) //Usually not required here. Exception: OutputLayer dropout - .with(ArrayType.ACTIVATION_GRAD, WS_LAYER_ACT_2, WS_LAYER_ACT_X_CONFIG) - .with(ArrayType.FF_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) - .with(ArrayType.BP_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) - .with(ArrayType.RNN_FF_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM_CONFIG) - .with(ArrayType.RNN_BP_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM_CONFIG) - .build(); + mgrOdd = LayerWorkspaceMgr.builder() + //Activations in context of backprop (preOut methods etc) are not used outside of the layer itself + .with(ArrayType.ACTIVATIONS, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) + .with(ArrayType.INPUT, WS_ALL_LAYERS_ACT, + WS_ALL_LAYERS_ACT_CONFIG) //Usually not required here. Exception: OutputLayer dropout + .with(ArrayType.ACTIVATION_GRAD, WS_LAYER_ACT_2, WS_LAYER_ACT_X_CONFIG) + .with(ArrayType.FF_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) + .with(ArrayType.BP_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) + .with(ArrayType.RNN_FF_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, + WS_RNN_LOOP_WORKING_MEM_CONFIG) + .with(ArrayType.RNN_BP_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, + WS_RNN_LOOP_WORKING_MEM_CONFIG) + .build(); - if(epsilon == null) { - //If epsilon is non-null: external errors use case -> inputs are already detached - WorkspaceUtils.assertOpenActiveAndCurrent(WS_ALL_LAYERS_ACT, "calcBackpropGradients method requires workspace WS_ALL_LAYERS_ACT" + - " to be open when workspaces are used"); - } - } - mgrEven.setHelperWorkspacePointers(helperWorkspaces); - mgrOdd.setHelperWorkspacePointers(helperWorkspaces); - - //calculate and apply the backward gradient for every layer - /* - * Skip the output layer for the indexing and just loop backwards updating the coefficients for each layer. - * (when withOutputLayer == true) - * - * Activate applies the activation function for each layer and sets that as the input for the following layer. - * - * Typical literature contains most trivial case for the error calculation: wT * weights - * This interpretation transpose a few things to get mini batch because ND4J is rows vs columns organization for params - */ - int numLayers = getnLayers(); - //Store gradients is a list; used to ensure iteration order in DefaultGradient linked hash map. i.e., layer 0 first instead of output layer - LinkedList> gradientList = new LinkedList<>(); - - - Pair currPair = null; - MemoryWorkspace wsActGradCloseNext = null; - MemoryWorkspace wsActGradTemp = null; - MemoryWorkspace initialWorkspace = Nd4j.getMemoryManager().getCurrentWorkspace(); - - boolean traceLog = log.isTraceEnabled(); - - Throwable t = null; - try { - for (int i = layers.length - 1; i >= 0; i--) { - if (layers[i] instanceof FrozenLayer) { - break; - } - - if (traceLog) { - log.trace("About to backprop: {} - {}", i, layers[i].getClass().getSimpleName()); - } - - LayerWorkspaceMgr workspaceMgr = (i % 2 == 0 ? mgrEven : mgrOdd); - - if (withOutputLayer && i == layers.length - 1) { - if (!(getOutputLayer() instanceof IOutputLayer)) { - log.warn("Warning: final layer isn't output layer. You cannot use backprop without an output layer."); - return null; - } - - IOutputLayer outputLayer = (IOutputLayer) getOutputLayer(); - if (labels == null && outputLayer.needsLabels()) - throw new IllegalStateException("No labels found"); - outputLayer.setLabels(labels); - } - - //Open activation gradients WS *then* BP working memory, so BP working memory is opened last for use in layers - wsActGradTemp = workspaceMgr.notifyScopeEntered(ArrayType.ACTIVATION_GRAD); - try (MemoryWorkspace wsBPWorking = workspaceMgr.notifyScopeEntered(ArrayType.BP_WORKING_MEM)) { - - //Note that because we're opening activation workspaces not in a simple nested order, we'll manually - // override the previous workspace setting. Otherwise, when we close these workspaces, the "current" - // workspace may be set to the incorrect one - wsActGradTemp.setPreviousWorkspace(initialWorkspace); - wsBPWorking.setPreviousWorkspace(initialWorkspace); - - INDArray eps = (i == layers.length - 1 ? epsilon : currPair.getRight()); //eps is null for OutputLayer - - if (!tbptt) { - //Standard case - currPair = layers[i].backpropGradient(eps, workspaceMgr); - } else { - //TBPTT gradient - if (layers[i] instanceof RecurrentLayer) { - currPair = ((RecurrentLayer) layers[i]).tbpttBackpropGradient(currPair.getSecond(), - layerWiseConfigurations.getTbpttBackLength(), workspaceMgr); - } else { - currPair = layers[i].backpropGradient(currPair.getSecond(), workspaceMgr); - } - } - - if (currPair.getSecond() != null) { - //Edge case: may be null for Embedding layer, for example - validateArrayWorkspaces(workspaceMgr, currPair.getSecond(), ArrayType.ACTIVATION_GRAD, i, - false, "Backprop"); - } - - for (Map.Entry entry : currPair.getFirst().gradientForVariable().entrySet()) { - String origName = entry.getKey(); - multiGradientKey = i + "_" + origName; - gradientList.addLast(new Triple<>(multiGradientKey, entry.getValue(), - currPair.getFirst().flatteningOrderForVariable(origName))); - } - if (getLayerWiseConfigurations().getInputPreProcess(i) != null) { - currPair = new Pair<>(currPair.getFirst(), - this.layerWiseConfigurations.getInputPreProcess(i) - .backprop(currPair.getSecond(), getInputMiniBatchSize(), workspaceMgr)); - if (i > 0 && currPair.getSecond() != null) { - validateArrayWorkspaces(workspaceMgr, currPair.getSecond(), ArrayType.ACTIVATION_GRAD, i, - true, "Backprop"); - } - } - - if (i == 0) { - if (returnInputActGrad && currPair.getSecond() != null) { - currPair.setSecond(currPair.getSecond().detach()); - } else { - currPair.setSecond(null); - } - } - - if (wsActGradCloseNext != null) { - wsActGradCloseNext.close(); - } - wsActGradCloseNext = wsActGradTemp; - wsActGradTemp = null; - } - - if (traceLog) { - log.trace("Completed backprop: {} - {}", i, layers[i].getClass().getSimpleName()); - } - } - } catch (Throwable thr ){ - t = thr; - } finally { - if(wsActGradCloseNext != null){ - try { - wsActGradCloseNext.close(); - } catch (Throwable t2){ - if(t != null){ - log.error("Encountered second exception while trying to close workspace after initial exception"); - log.error("Original exception:", t); - throw t2; - } - } - } - if(wsActGradTemp != null) { - //Should only be non-null on exception - try { - wsActGradTemp.close(); - } catch (Throwable t2) { - if (t != null) { - log.error("Encountered second exception while trying to close workspace after initial exception"); - log.error("Original exception:", t); - throw t2; - } - } - } - Nd4j.getMemoryManager().setCurrentWorkspace(initialWorkspace); - - if(t != null){ - if(t instanceof RuntimeException){ - throw ((RuntimeException)t); - } - throw new RuntimeException("Error during neural network forward pass", t); - } - } - - if (layerWiseConfigurations.getTrainingWorkspaceMode() == WorkspaceMode.NONE) { - WorkspaceUtils.assertNoWorkspacesOpen("Expected no workspace active in calcBackpropGradients when " + - "training workspace is set to none"); - } else { - if(epsilon == null) { - //If epsilon != null: external errors use case (inputs are detached instead) - WorkspaceUtils.assertOpenActiveAndCurrent(WS_ALL_LAYERS_ACT, "calcBackpropGradients: WS_ALL_LAYERS_ACT is no" + - " longer the currently open/active workspace"); - } - } - - //Add gradients to Gradients (map), in correct order - for (Triple triple : gradientList) { - gradient.setGradientFor(triple.getFirst(), triple.getSecond(), triple.getThird()); - } - - return new Pair<>(gradient, currPair.getSecond()); + if (epsilon == null) { + //If epsilon is non-null: external errors use case -> inputs are already detached + WorkspaceUtils.assertOpenActiveAndCurrent(WS_ALL_LAYERS_ACT, + "calcBackpropGradients method requires workspace WS_ALL_LAYERS_ACT" + + " to be open when workspaces are used"); + } } + mgrEven.setHelperWorkspacePointers(helperWorkspaces); + mgrOdd.setHelperWorkspacePointers(helperWorkspaces); - protected void doTruncatedBPTT(INDArray input, INDArray labels, INDArray featuresMaskArray, - INDArray labelsMaskArray, LayerWorkspaceMgr workspaceMgr) { - if (input.rank() != 3 || labels.rank() != 3) { - log.warn("Cannot do truncated BPTT with non-3d inputs or labels. Expect input with shape [miniBatchSize,nIn,timeSeriesLength], got " - + Arrays.toString(input.shape()) + "\tand labels with shape " - + Arrays.toString(labels.shape())); - return; - } - if (input.size(2) != labels.size(2)) { - log.warn("Input and label time series have different lengths: {} input length, {} label length", - input.size(2), labels.size(2)); - return; - } - - int fwdLen = layerWiseConfigurations.getTbpttFwdLength(); - update(TaskUtils.buildTask(input, labels)); - val timeSeriesLength = input.size(2); - long nSubsets = timeSeriesLength / fwdLen; - if (timeSeriesLength % fwdLen != 0) - nSubsets++; //Example: 100 fwdLen with timeSeriesLength=120 -> want 2 subsets (1 of size 100, 1 of size 20) - - rnnClearPreviousState(); - - for (int i = 0; i < nSubsets; i++) { - long startTimeIdx = (long) i * fwdLen; - long endTimeIdx = startTimeIdx + fwdLen; - if (endTimeIdx > timeSeriesLength) - endTimeIdx = timeSeriesLength; - - if (startTimeIdx > Integer.MAX_VALUE || endTimeIdx > Integer.MAX_VALUE) - throw new ND4JArraySizeException(); - INDArray[] subsets = getSubsetsForTbptt((int) startTimeIdx, (int) endTimeIdx, input, labels, - featuresMaskArray, labelsMaskArray); - - setInput(subsets[0]); - setLabels(subsets[1]); - setLayerMaskArrays(subsets[2], subsets[3]); - - if (solver == null) { - try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { - solver = new Solver.Builder().configure(conf()).listeners(getListeners()).model(this) - .build(); - } - } - solver.optimize(workspaceMgr); - - //Finally, update the state of the RNN layers: - updateRnnStateWithTBPTTState(); - } - - rnnClearPreviousState(); - clearLayerMaskArrays(); - } - - private INDArray[] getSubsetsForTbptt(int startTimeIdx, int endTimeIdx, INDArray input, INDArray labels, - INDArray fMask, INDArray lMask ){ - INDArray[] out = new INDArray[4]; - out[0] = input.get(NDArrayIndex.all(), NDArrayIndex.all(), - NDArrayIndex.interval(startTimeIdx, endTimeIdx)); - out[1] = labels.get(NDArrayIndex.all(), NDArrayIndex.all(), - NDArrayIndex.interval(startTimeIdx, endTimeIdx)); - - if (fMask != null) { - out[2] = fMask.get(NDArrayIndex.all(), - NDArrayIndex.interval(startTimeIdx, endTimeIdx)); - } - if (lMask != null) { - out[3] = lMask.get(NDArrayIndex.all(), - NDArrayIndex.interval(startTimeIdx, endTimeIdx)); - } - - return out; - } - - /** - * Intended for internal/developer use + //calculate and apply the backward gradient for every layer + /* + * Skip the output layer for the indexing and just loop backwards updating the coefficients for each layer. + * (when withOutputLayer == true) + * + * Activate applies the activation function for each layer and sets that as the input for the following layer. + * + * Typical literature contains most trivial case for the error calculation: wT * weights + * This interpretation transpose a few things to get mini batch because ND4J is rows vs columns organization for params */ - public void updateRnnStateWithTBPTTState() { - for (int i = 0; i < layers.length; i++) { + int numLayers = getnLayers(); + //Store gradients is a list; used to ensure iteration order in DefaultGradient linked hash map. i.e., layer 0 first instead of output layer + LinkedList> gradientList = new LinkedList<>(); + + Pair currPair = null; + MemoryWorkspace wsActGradCloseNext = null; + MemoryWorkspace wsActGradTemp = null; + MemoryWorkspace initialWorkspace = Nd4j.getMemoryManager().getCurrentWorkspace(); + + boolean traceLog = log.isTraceEnabled(); + + Throwable t = null; + try { + for (int i = layers.length - 1; i >= 0; i--) { + if (layers[i] instanceof FrozenLayer) { + break; + } + + if (traceLog) { + log.trace("About to backprop: {} - {}", i, layers[i].getClass().getSimpleName()); + } + + LayerWorkspaceMgr workspaceMgr = (i % 2 == 0 ? mgrEven : mgrOdd); + + if (withOutputLayer && i == layers.length - 1) { + if (!(getOutputLayer() instanceof IOutputLayer)) { + log.warn( + "Warning: final layer isn't output layer. You cannot use backprop without an output layer."); + return null; + } + + IOutputLayer outputLayer = (IOutputLayer) getOutputLayer(); + if (labels == null && outputLayer.needsLabels()) { + throw new IllegalStateException("No labels found"); + } + outputLayer.setLabels(labels); + } + + //Open activation gradients WS *then* BP working memory, so BP working memory is opened last for use in layers + wsActGradTemp = workspaceMgr.notifyScopeEntered(ArrayType.ACTIVATION_GRAD); + try (MemoryWorkspace wsBPWorking = workspaceMgr.notifyScopeEntered( + ArrayType.BP_WORKING_MEM)) { + + //Note that because we're opening activation workspaces not in a simple nested order, we'll manually + // override the previous workspace setting. Otherwise, when we close these workspaces, the "current" + // workspace may be set to the incorrect one + wsActGradTemp.setPreviousWorkspace(initialWorkspace); + wsBPWorking.setPreviousWorkspace(initialWorkspace); + + INDArray eps = (i == layers.length - 1 ? epsilon + : currPair.getRight()); //eps is null for OutputLayer + + if (!tbptt) { + //Standard case + currPair = layers[i].backpropGradient(eps, workspaceMgr); + } else { + //TBPTT gradient if (layers[i] instanceof RecurrentLayer) { - RecurrentLayer l = ((RecurrentLayer) layers[i]); - l.rnnSetPreviousState(l.rnnGetTBPTTState()); - } else if (layers[i] instanceof MultiLayerNetwork) { - ((MultiLayerNetwork) layers[i]).updateRnnStateWithTBPTTState(); - } - } - } - - /** - * Get the {@link TrainingListener}s set for this network, if any - * @return listeners set for this network - */ - public Collection getListeners() { - return trainingListeners; - } - - /** - * @deprecated Use {@link #getListeners()} - */ - @Deprecated - public Collection getTrainingListeners() { - return trainingListeners; - } - - @Override - public void setListeners(Collection listeners) { - if (layers == null) { - init(); - } - for (Layer layer : layers) { - layer.setListeners(listeners); - } - - if (solver != null) { - solver.setListeners(listeners); - } - - this.trainingListeners.clear(); - if (listeners != null) { - this.trainingListeners.addAll(listeners); - } - } - - /** - * This method ADDS additional TrainingListener to existing listeners - * - * @param listeners - */ - @Override - public void addListeners(TrainingListener... listeners) { - Collections.addAll(trainingListeners, listeners); - - // fixme this is wrong, since it removes existing listeners from the solver - if (solver != null) { - solver.setListeners(this.trainingListeners); - } - } - - @Override - public void setListeners(TrainingListener... listeners) { - Collection cListeners = new ArrayList<>(); - //Check: user might have done setListeners(null) thinking this would clear the current listeners. - //This results in an TrainingListener[1] with a single null value -> results in a NPE later - if (listeners != null && listeners.length > 0) { - for (TrainingListener i : listeners) { - if (i != null) - cListeners.add(i); - } - } - setListeners(cListeners); - } - - /** - * Usable only for classification networks in conjunction with OutputLayer. Cannot be used with RnnOutputLayer, - * CnnLossLayer, or networks used for regression.
- * To get the raw output activations of the output layer, use {@link #output(INDArray)} or similar.
- *
- * Equivalent to argmax(this.output(input)): Returns the predicted class indices corresponding to the predictions - * for each example in the features array. - * - * @param d The input features to perform inference on - * @return The predicted class index for each example - */ - @Override - public int[] predict(INDArray d) { - INDArray output = output(d, Layer.TrainingMode.TEST); - - if (d.size(0) > Integer.MAX_VALUE) - throw new ND4JArraySizeException(); - - Preconditions.checkState(output.rank() == 2, "predict(INDArray) method can only be used on rank 2 output - got array with rank %s", output.rank()); - return output.argMax(1).toIntVector(); - } - - /** - * As per {@link #predict(INDArray)} but the returned values are looked up from the list of label names - * in the provided DataSet - */ - @Override - public List predict(org.nd4j.linalg.dataset.api.DataSet dataSet) { - Preconditions.checkState(dataSet.getLabelNamesList() != null, "This method can only be used when the DataSet contains a label name list"); - int[] intRet = predict(dataSet.getFeatures()); - List ret = new ArrayList<>(); - for (int i = 0; i < intRet.length; i++) { - ret.add(i, dataSet.getLabelName(intRet[i])); - } - return ret; - } - - /** - * Fit the model for one iteration on the provided data - * - * @param data the examples to classify (one example in each row) - * @param labels the example labels(a binary outcome matrix) - */ - @Override - public void fit(INDArray data, INDArray labels) { - fit(data, labels, null, null); - } - - /** - * Fit the model for one iteration on the provided data - * - * @param features the examples to classify (one example in each row) - * @param labels the example labels(a binary outcome matrix) - * @param featuresMask The mask array for the features (used for variable length time series, etc). May be null. - * @param labelsMask The mask array for the labels (used for variable length time series, etc). May be null. - */ - public synchronized void fit(INDArray features, INDArray labels, INDArray featuresMask, INDArray labelsMask) { - try{ - fitHelper(features, labels, featuresMask, labelsMask); - } catch (OutOfMemoryError e){ - CrashReportingUtil.writeMemoryCrashDump(this, e); - throw e; - } - } - - private void fitHelper(INDArray features, INDArray labels, INDArray featuresMask, INDArray labelsMask){ - if(numParams() == 0) { - //No op: can't fit a network with 0 parameters - return; - } - - setInput(features); - setLabels(labels); - this.setLayerMaskArrays(featuresMask, labelsMask); - update(TaskUtils.buildTask(features, labels)); - - LayerWorkspaceMgr workspaceMgr; - if(layerWiseConfigurations.getTrainingWorkspaceMode() == null){ - workspaceMgr = LayerWorkspaceMgr.noWorkspaces(); - } else { - workspaceMgr = LayerWorkspaceMgr.builder() - .with(ArrayType.INPUT, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) - .with(ArrayType.ACTIVATIONS, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) - //Note for updater working memory, we have the option to re-use WS_ALL_LAYERS_ACT or FF/BP_WORKING_MEM - // these should be closed by the time updaters are executed - //Generally, WS_ALL_LAYERS_ACT will be the larger of the two, so we'll use this - .with(ArrayType.UPDATER_WORKING_MEM, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) - .build(); - } - workspaceMgr.setHelperWorkspacePointers(helperWorkspaces); - - if (layerWiseConfigurations.getBackpropType() == BackpropType.TruncatedBPTT) { - doTruncatedBPTT(features, labels, featuresMask, labelsMask, workspaceMgr); - } else { - if (solver == null) { - try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { - solver = new Solver.Builder().configure(conf()).listeners(getListeners()).model(this).build(); - } - } - //TODO CACHE WORKSPACE, IF USED??? - solver.optimize(workspaceMgr); - } - - clearLayerMaskArrays(); - clearLayersStates(); - synchronizeIterEpochCounts(); - } - - @Override - public void fit(INDArray data, LayerWorkspaceMgr workspaceMgr){ - throw new UnsupportedOperationException("Not supported: use pretrainLayer"); - } - - - /** - * Fit the model for one iteration on the provided data - * - * @param data the data to train on - */ - @Override - public void fit(org.nd4j.linalg.dataset.api.DataSet data) { - fit(data.getFeatures(), data.getLabels(), data.getFeaturesMaskArray(), data.getLabelsMaskArray()); - } - - /** - * Fit the model for one iteration on the provided data - * - * @param examples the examples to classify (one example in each row) - * @param labels the labels for each example (the number of labels must match - */ - @Override - public void fit(INDArray examples, int[] labels) { - org.deeplearning4j.nn.conf.layers.OutputLayer layerConf = - (org.deeplearning4j.nn.conf.layers.OutputLayer) getOutputLayer().conf().getLayer(); - - if (layerConf.getNOut() > Integer.MAX_VALUE) - throw new ND4JArraySizeException(); - fit(examples, FeatureUtil.toOutcomeMatrix(labels, (int) layerConf.getNOut())); - } - - - /** - * Perform inference on the provided input/features - i.e., perform forward pass using the provided input/features - * and return the output of the final layer. - * - * @param input Input to the network - * @param train whether the output is test or train. This mainly affect hyper parameters such as dropout and - * batch normalization, which have different behaviour for test vs. train - * @return The network predictions - i.e., the activations of the final layer - */ - public INDArray output(INDArray input, TrainingMode train) { - return output(input, train == TrainingMode.TRAIN); - } - - /** - * Perform inference on the provided input/features - i.e., perform forward pass using the provided input/features - * and return the output of the final layer. - * - * @param input Input to the network - * @param train whether the output is test or train. This mainly affect hyper parameters such as dropout and - * batch normalization, which have different behaviour for test vs. train - * @return The network predictions - i.e., the activations of the final layer - */ - public INDArray output(INDArray input, boolean train) { - return output(input, train, null, null); - } - - /** - * Calculate the output of the network, with masking arrays. The masking arrays are used in situations such - * as one-to-many and many-to-one recurrent neural network (RNN) designs, as well as for supporting time series - * of varying lengths within the same minibatch. - */ - public INDArray output(INDArray input, boolean train, INDArray featuresMask, INDArray labelsMask) { - return output(input, train, featuresMask, labelsMask, null); - } - - /** - * Get the network output, which is optionally placed in the specified memory workspace.
- * If no memory workspace is provided, the output will be detached (not in any workspace).
- * If a memory workspace is provided, the output activation array (i.e., the INDArray returned by this method) - * will be placed in the specified workspace. This workspace must be opened by the user before calling this method - - * and the user is responsible for (a) closing this workspace, and (b) ensuring the output array is not used out - * of scope (i.e., not used after closing the workspace to which it belongs - as this is likely to cause either - * an exception when used, or a crash). - * - * @param input Input to the network - * @param train True for train, false otherwise - * @param outputWorkspace May be null. If not null: the workspace MUST be opened before calling this method. - * @return The output/activations from the network (either detached or in the specified workspace if provided) - */ - public INDArray output(INDArray input, boolean train, MemoryWorkspace outputWorkspace) { - return output(input, train, null, null, outputWorkspace); - } - - /** - * Get the network output, which is optionally placed in the specified memory workspace.
- * If no memory workspace is provided, the output will be detached (not in any workspace).
- * If a memory workspace is provided, the output activation array (i.e., the INDArray returned by this method) - * will be placed in the specified workspace. This workspace must be opened by the user before calling this method - - * and the user is responsible for (a) closing this workspace, and (b) ensuring the output array is not used out - * of scope (i.e., not used after closing the workspace to which it belongs - as this is likely to cause either - * an exception when used, or a crash). - * - * @param input Input to the network - * @param train True for train, false otherwise - * @param outputWorkspace May be null. If not null: the workspace MUST be opened before calling this method. - * @return The output/activations from the network (either detached or in the specified workspace if provided) - */ - public synchronized INDArray output(INDArray input, boolean train, INDArray featuresMask, INDArray labelsMask, MemoryWorkspace outputWorkspace) { - try { - return outputOfLayerDetached(train, FwdPassType.STANDARD, layers.length - 1, input, featuresMask, labelsMask, outputWorkspace); - } catch (OutOfMemoryError e) { - CrashReportingUtil.writeMemoryCrashDump(this, e); - throw e; - } - } - - /** - * This method uses provided OutputAdapter to return custom object built from INDArray - * - * PLEASE NOTE: This method uses dedicated Workspace for output generation to avoid redundant allocations - * - * @param inputs Input arrays to the netwonk - * @param inputMasks Optional input mask arrays (may be null) - * @param labelMasks Optional label mask arrays (may be null - * @param outputAdapter OutputAdapter instance - * @param T extends Object - * @return T instance produced by OutputAdapter - */ - public synchronized T output(@NonNull INDArray inputs, INDArray inputMasks, INDArray labelMasks, @NonNull OutputAdapter outputAdapter) { - try (val ws = Nd4j.getWorkspaceManager().getAndActivateWorkspace(WS_ALL_LAYERS_ACT_CONFIG, WS_OUTPUT_MEM)) { - if (outputAdapter instanceof ModelAdapter) - return ((ModelAdapter) outputAdapter).apply(this, new INDArray[]{inputs}, new INDArray[]{ inputMasks}, new INDArray[]{labelMasks}); - else - return outputAdapter.apply(output(inputs, false, inputMasks, labelMasks, ws)); - } - } - - /** - * Perform inference on the provided input/features - i.e., perform forward pass using the provided input/features - * and return the output of the final layer. Equivalent to {@link #output(INDArray, boolean)} with train=false - i.e., - * this method is used for inference. - * - * @param input Input to the network - * @return The network predictions - i.e., the activations of the final layer - */ - public INDArray output(INDArray input) { - return output(input, TrainingMode.TEST); - } - - /** - * Generate the output for all examples/batches in the input iterator, and concatenate them into a single array. - * See {@link #output(INDArray)}
- * NOTE 1: The output array can require a considerable amount of memory for iterators with a large number of examples
- * NOTE 2: This method cannot be used for variable length time series outputs, as this would require padding arrays - * for some outputs, or returning a mask array (which cannot be done with this method). For variable length time - * series applications, use one of the other output methods. This method also cannot be used with fully convolutional - * networks with different output sizes (for example, segmentation on different input image sizes). - * - * - * @param iterator Data to pass through the network - * @return output for all examples in the iterator, concatenated into a - */ - public INDArray output(DataSetIterator iterator, boolean train) { - List outList = new ArrayList<>(); - long[] firstOutputShape = null; - while (iterator.hasNext()) { - DataSet next = iterator.next(); - INDArray features = next.getFeatures(); - - if (features == null) - continue; - - INDArray fMask = next.getFeaturesMaskArray(); - INDArray lMask = next.getLabelsMaskArray(); - INDArray output = this.output(features, train, fMask, lMask); - outList.add(output); - if(firstOutputShape == null){ - firstOutputShape = output.shape(); + currPair = ((RecurrentLayer) layers[i]).tbpttBackpropGradient(currPair.getSecond(), + layerWiseConfigurations.getTbpttBackLength(), workspaceMgr); } else { - //Validate that shapes are the same (may not be, for some RNN variable length time series applications) - long[] currShape = output.shape(); - Preconditions.checkState(firstOutputShape.length == currShape.length, "Error during forward pass:" + - "different minibatches have different output array ranks - first minibatch shape %s, last minibatch shape %s", firstOutputShape, currShape); - for( int i=1; i - * This is equivalent to {@link #score(DataSet, boolean)} with training==false. - * @param data the data to score - * @return the score for the given input,label pairs - * @see #score(DataSet, boolean) - */ - public double score(DataSet data) { - return score(data, false); - } - - /** - * Sets the input and labels and calculates the score (value of the output layer loss function plus l1/l2 if applicable) - * for the prediction with respect to the true labels
- * @param data data to calculate score for - * @param training If true: score during training. If false: score at test time. This can affect the application of - * certain features, such as dropout and dropconnect (which are applied at training time only) - * @return the score (value of the loss function) - */ - public double score(DataSet data, boolean training) { - try{ - return scoreHelper(data, training); - } catch (OutOfMemoryError e){ - CrashReportingUtil.writeMemoryCrashDump(this, e); - throw e; - } - } - - private double scoreHelper(DataSet data, boolean training){ - boolean hasMaskArray = data.hasMaskArrays(); - if (hasMaskArray) - setLayerMaskArrays(data.getFeaturesMaskArray(), data.getLabelsMaskArray()); - - if (!(getOutputLayer() instanceof IOutputLayer)) { - throw new IllegalStateException("Cannot calculate score if final layer is not an instance of IOutputLayer. " + - "Final layer is of type: " + getOutputLayer().getClass()); - } - - WorkspaceMode wsm = (training ? layerWiseConfigurations.getTrainingWorkspaceMode() : layerWiseConfigurations.getInferenceWorkspaceMode()); - LayerWorkspaceMgr mgr; - if(wsm == WorkspaceMode.NONE){ - mgr = LayerWorkspaceMgr.noWorkspaces(); - } else { - mgr = LayerWorkspaceMgr.builder() - .with(ArrayType.FF_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) - .with(ArrayType.RNN_FF_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM_CONFIG) - //TODO we can probably optimize this - .noWorkspaceFor(ArrayType.ACTIVATIONS) - .noWorkspaceFor(ArrayType.INPUT) - .build(); - } - mgr.setHelperWorkspacePointers(helperWorkspaces); - - INDArray inputToOutputLayer = outputOfLayerDetached(training, FwdPassType.STANDARD,layers.length-2, data.getFeatures(), - data.getFeaturesMaskArray(), data.getLabelsMaskArray(), null); - - if (data.getFeatures().size(0) > Integer.MAX_VALUE) - throw new ND4JArraySizeException(); - IOutputLayer ol = (IOutputLayer) getOutputLayer(); - if (getLayerWiseConfigurations().getInputPreProcess(layers.length - 1) != null) { - inputToOutputLayer = getLayerWiseConfigurations().getInputPreProcess(layers.length - 1) - .preProcess(inputToOutputLayer, (int) data.getFeatures().size(0), mgr); - } - ol.setInput(inputToOutputLayer, mgr); //Feedforward doesn't include output layer for efficiency - ol.setLabels(data.getLabels()); - double score; - try(MemoryWorkspace ws = mgr.notifyScopeEntered(ArrayType.FF_WORKING_MEM)) { - score = ol.computeScore(calcRegularizationScore(true), training, mgr); - } - - if (hasMaskArray) - clearLayerMaskArrays(); - clearLayersStates(); - - return score; - } - - /** - * As per {@link #scoreExamples(DataSet, boolean)} - the outputs (example scores) for all DataSets in the iterator are concatenated - */ - public INDArray scoreExamples(DataSetIterator iter, boolean addRegularizationTerms) { - List out = new ArrayList<>(); - - while (iter.hasNext()) { - out.add(scoreExamples(iter.next(), addRegularizationTerms)); - } - return Nd4j.toFlattened('f', out); - } - - /**Calculate the score for each example in a DataSet individually. Unlike {@link #score(DataSet)} and {@link #score(DataSet, boolean)} - * this method does not average/sum over examples. This method allows for examples to be scored individually (at test time only), which - * may be useful for example for autoencoder architectures and the like.
- * Each row of the output (assuming addRegularizationTerms == true) is equivalent to calling score(DataSet) with a single example. - * @param data The data to score - * @param addRegularizationTerms If true: add l1/l2 regularization terms (if any) to the score. If false: don't add regularization terms - * @return An INDArray (column vector) of size input.numRows(); the ith entry is the score (loss value) of the ith example - */ - public INDArray scoreExamples(DataSet data, boolean addRegularizationTerms) { - try{ - return scoreExamplesHelper(data, addRegularizationTerms); - } catch (OutOfMemoryError e){ - CrashReportingUtil.writeMemoryCrashDump(this, e); - throw e; - } - } - - private INDArray scoreExamplesHelper(DataSet data, boolean addRegularizationTerms){ - INDArray inputLast = outputOfLayerDetached(false, FwdPassType.STANDARD,layers.length-2, data.getFeatures(), - data.getFeaturesMaskArray(), data.getLabelsMaskArray(), null); - setLabels(data.getLabels()); - setLayerMaskArrays(data.getFeaturesMaskArray(), data.getLabelsMaskArray()); - - //TODO we might want workspaces here? - LayerWorkspaceMgr mgr = LayerWorkspaceMgr.noWorkspaces(); - - INDArray out; - if (getOutputLayer() instanceof IOutputLayer) { - IOutputLayer ol = (IOutputLayer) getOutputLayer(); - if(layerWiseConfigurations.getInputPreProcess(layers.length-1) != null){ - - if (data.getFeatures().size(0) > Integer.MAX_VALUE) - throw new ND4JArraySizeException(); - inputLast = layerWiseConfigurations.getInputPreProcess(layers.length-1).preProcess(inputLast, - (int) data.getFeatures().size(0), mgr); + for (Map.Entry entry : currPair.getFirst().gradientForVariable() + .entrySet()) { + String origName = entry.getKey(); + multiGradientKey = i + "_" + origName; + gradientList.addLast(new Triple<>(multiGradientKey, entry.getValue(), + currPair.getFirst().flatteningOrderForVariable(origName))); + } + if (getLayerWiseConfigurations().getInputPreProcess(i) != null) { + currPair = new Pair<>(currPair.getFirst(), + this.layerWiseConfigurations.getInputPreProcess(i) + .backprop(currPair.getSecond(), getInputMiniBatchSize(), workspaceMgr)); + if (i > 0 && currPair.getSecond() != null) { + validateArrayWorkspaces(workspaceMgr, currPair.getSecond(), ArrayType.ACTIVATION_GRAD, + i, + true, "Backprop"); } - ol.setLabels(data.getLabels()); - ol.setInput(inputLast, mgr); - double r = (addRegularizationTerms ? calcRegularizationScore(true) : 0); - out = ol.computeScoreForExamples(r, mgr); - } else { - throw new UnsupportedOperationException( - "Cannot calculate score with respect to labels without an OutputLayer"); - } + } - clearLayersStates(); - clearLayerMaskArrays(); - return out; - } - - - @Override - public void fit() { - fit(input, labels); - } - - @Override - public void update(INDArray gradient, String paramType) { - throw new UnsupportedOperationException("Not implemented"); - } - - - /** - * Score of the model (relative to the objective function) - previously calculated on the last minibatch - * - * @return the score of the model (relative to the objective function) - */ - @Override - public double score() { - return score; - } - - /** - * Intended for developer/internal use - */ - public void setScore(double score) { - this.score = score; - } - - @Override - public void computeGradientAndScore(LayerWorkspaceMgr layerWorkspaceMgr){ - computeGradientAndScore(); - } - - public void computeGradientAndScore() { - - if (!(getOutputLayer() instanceof IOutputLayer)) { - throw new DL4JException( - "Cannot calculate gradient and score with respect to labels: final layer is not an IOutputLayer. " + - "Final layer class: " + getOutputLayer().getClass() + ". To calculate gradients and fit a network " + - "using backpropagation, the final layer must be an output layer"); - } - - //Note: Workspace manager is only ose here for score calculation... other workspace managers are used in the - // various FF/backprop methds - LayerWorkspaceMgr mgr; - if(layerWiseConfigurations.getTrainingWorkspaceMode() == WorkspaceMode.NONE){ - mgr = LayerWorkspaceMgr.noWorkspaces(); - } else { - mgr = LayerWorkspaceMgr.builder() - .with(ArrayType.INPUT, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) - .with(ArrayType.ACTIVATIONS, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) - .with(ArrayType.FF_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) - .with(ArrayType.BP_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) - .with(ArrayType.RNN_FF_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM_CONFIG) - .with(ArrayType.RNN_BP_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM_CONFIG) - .build(); - - if(layerWiseConfigurations.getCacheMode() != null){ - //For now: store cache mode activations in activations workspace - mgr.setWorkspace(ArrayType.FF_CACHE, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG); + if (i == 0) { + if (returnInputActGrad && currPair.getSecond() != null) { + currPair.setSecond(currPair.getSecond().detach()); + } else { + currPair.setSecond(null); } + } + + if (wsActGradCloseNext != null) { + wsActGradCloseNext.close(); + } + wsActGradCloseNext = wsActGradTemp; + wsActGradTemp = null; } - boolean tbptt = layerWiseConfigurations.getBackpropType() == BackpropType.TruncatedBPTT; - FwdPassType fwdType = (tbptt ? FwdPassType.RNN_ACTIVATE_WITH_STORED_STATE : FwdPassType.STANDARD); - synchronizeIterEpochCounts(); - - //Calculate activations (which are stored in each layer, and used in backprop) - try(MemoryWorkspace ws = mgr.notifyScopeEntered(ArrayType.ACTIVATIONS)) { - //First: do a feed-forward through the network - //Note that we don't actually need to do the full forward pass through the output layer right now; but we do - // need the input to the output layer to be set (such that backprop can be done) - List activations = ffToLayerActivationsInWs(layers.length - 2, fwdType, tbptt, input, mask, null); - if (!trainingListeners.isEmpty()) { - //TODO: We possibly do want output layer activations in some cases here... - for (TrainingListener tl : trainingListeners) { - tl.onForwardPass(this, activations); - } - } - INDArray inputToOutputLayer = activations.get(activations.size() - 1); - if (layerWiseConfigurations.getInputPreProcess(layers.length - 1) != null) { - inputToOutputLayer = layerWiseConfigurations.getInputPreProcess(layers.length - 1) - .preProcess(inputToOutputLayer, getInputMiniBatchSize(), mgr); - //Validate activations location - } - getOutputLayer().setInput(inputToOutputLayer, mgr); - //Then: compute gradients - Pair pair = calcBackpropGradients(null, true, false, false); - this.gradient = (pair == null ? null : pair.getFirst()); - - //Calculate score - try(MemoryWorkspace wsFF = mgr.notifyScopeEntered(ArrayType.FF_WORKING_MEM)) { - double r = calcRegularizationScore(true); - score = ((IOutputLayer) getOutputLayer()).computeScore(r, true, mgr); - } - - //Listeners - if (!trainingListeners.isEmpty()) { - try (MemoryWorkspace workspace = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { - for (TrainingListener tl : trainingListeners) { - tl.onBackwardPass(this); - } - } - } + if (traceLog) { + log.trace("Completed backprop: {} - {}", i, layers[i].getClass().getSimpleName()); } - - //Clear the post noise/dropconnect parameters on the output layer - getOutputLayer().clearNoiseWeightParams(); - } - - /** - * Clear the inputs. Clears optimizer state. - */ - public void clear() { - for (Layer layer : layers) - layer.clear(); - - input = null; - labels = null; - solver = null; - } - - @Override - public void applyConstraints(int iteration, int epoch) { - for(Layer l : layers){ - l.applyConstraints(iteration, epoch); - } - } - - - /** - * Set the input array for the network - * - * @param input Input array to set - */ - public void setInput(INDArray input) { - this.input = input; - if (this.layers == null) { - init(); - } - if (input != null) { - if (input.length() == 0) - throw new IllegalArgumentException( - "Invalid input: length 0 (shape: " + Arrays.toString(input.shape()) + ")"); - - if (input.size(0) > Integer.MAX_VALUE) - throw new ND4JArraySizeException(); - setInputMiniBatchSize((int) input.size(0)); - } - } - - @Override - public void setInput(INDArray input, LayerWorkspaceMgr mgr){ - throw new UnsupportedOperationException("Not supported"); - } - - /** - * Get the output layer - i.e., the last layer in the netwok - * - * @return - */ - public Layer getOutputLayer() { - Layer ret = getLayers()[getLayers().length - 1]; - if (ret instanceof FrozenLayerWithBackprop) { - ret = ((FrozenLayerWithBackprop) ret).getInsideLayer(); - } - return ret; - } - - - /** - * See {@link #setParams(INDArray)} - */ - public void setParameters(INDArray params) { - setParams(params); - } - - /** - * Intended for internal/developer use - */ - public NeuralNetConfiguration getDefaultConfiguration() { - return defaultConfiguration; - } - - public INDArray getLabels() { - return labels; - } - - public INDArray getInput() { - return input; - } - - - /** - * @param labels Labels to set - */ - public void setLabels(INDArray labels) { - this.labels = labels; - } - - /** - * Get the number of layers in the network - * - * @return the number of layers in the network - */ - public int getnLayers() { - return layerWiseConfigurations.getConfs().size(); - } - - /** - * @return The layers in the network - */ - public synchronized Layer[] getLayers() { - return layers; - } - - public Layer getLayer(int i) { - Preconditions.checkArgument(i >= 0 && i < layers.length, "Invalid layer index: layer index must be 0" + - " to %s (inclusive), got index %s", layers.length-1, i); - return layers[i]; - } - - public Layer getLayer(String name) { - return layerMap.get(name); - } - - public List getLayerNames() { - return new ArrayList<>(layerMap.keySet()); - } - - public void setLayers(Layer[] layers) { - this.layers = layers; - } - - public INDArray getMask() { - return mask; - } - - public void setMask(INDArray mask) { - this.mask = mask; - } - - public INDArray getMaskArray() { - return mask; - } - - @Override - public boolean isPretrainLayer() { - return false; - } - - @Override - public void clearNoiseWeightParams() { - for(Layer l : layers){ - l.clearNoiseWeightParams(); - } - } - - @Override - public void allowInputModification(boolean allow) { - throw new UnsupportedOperationException("Not supported"); - } - - @Override - public Pair feedForwardMaskArray(INDArray maskArray, MaskState currentMaskState, - int minibatchSize) { - if (maskArray == null) { - for (int i = 0; i < layers.length; i++) { - layers[i].feedForwardMaskArray(null, null, minibatchSize); - } - } else { - //Do a forward pass through each preprocessor and layer - for (int i = 0; i < layers.length; i++) { - InputPreProcessor preProcessor = getLayerWiseConfigurations().getInputPreProcess(i); - - if (preProcessor != null) { - Pair p = - preProcessor.feedForwardMaskArray(maskArray, currentMaskState, minibatchSize); - if (p != null) { - maskArray = p.getFirst(); - currentMaskState = p.getSecond(); - } else { - maskArray = null; - currentMaskState = null; - } - } - - Pair p = - layers[i].feedForwardMaskArray(maskArray, currentMaskState, minibatchSize); - if (p != null) { - maskArray = p.getFirst(); - currentMaskState = p.getSecond(); - } else { - maskArray = null; - currentMaskState = null; - } - } - } - - return new Pair<>(maskArray, currentMaskState); - } - - @Override - public LayerHelper getHelper() { - throw new UnsupportedOperationException("Not supported"); - } - - //========== - //Layer methods - - @Override - public Type type() { - return Type.MULTILAYER; - } - - - /** - * Equivalent to {@link #output(INDArray)} using the input set via {@link #setInput(INDArray)} - */ - public INDArray activate(TrainingMode training) { - return output(input, training == TrainingMode.TRAIN); - } - - /** - * Equivalent to {@link #output(INDArray, TrainingMode)} - */ - public INDArray activate(INDArray input, TrainingMode training) { - return output(input, training == TrainingMode.TRAIN); - } - - @Override - public Pair backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { - if (getOutputLayer() instanceof IOutputLayer) - throw new UnsupportedOperationException("Cannot calculate gradients based on epsilon with OutputLayer"); - - return calcBackpropGradients(epsilon, false, false, true); - } - - @Override - public void setIndex(int index) { - layerIndex = index; - } - - @Override - public int getIndex() { - return layerIndex; - } - - @Override - public int getIterationCount() { - return getLayerWiseConfigurations().getIterationCount(); - } - - @Override - public int getEpochCount() { - return getLayerWiseConfigurations().getEpochCount(); - } - - @Override - public void setIterationCount(int iterationCount) { - getLayerWiseConfigurations().setIterationCount(iterationCount); - } - - @Override - public void setEpochCount(int epochCount) { - getLayerWiseConfigurations().setEpochCount(epochCount); - } - - @Override - public double calcRegularizationScore(boolean backpropParamsOnly){ - double scoreSum = 0.0; - for (int i = 0; i < layers.length; i++) { - scoreSum += layers[i].calcRegularizationScore(backpropParamsOnly); - } - return scoreSum; - } - - @Override - public void update(Gradient gradient) { - if (gradient.gradient().length() != numParams(true)) - throw new IllegalArgumentException("Invalid input: expect gradients array of length " + numParams(true)); - for (Map.Entry entry : gradient.gradientForVariable().entrySet()) { - String key = entry.getKey(); - INDArray val = entry.getValue(); - int idx = key.indexOf('_'); - if (idx == -1) - throw new IllegalStateException("Invalid param key: not have layer separator: \"" + key + "\""); - Integer layerId = Integer.parseInt(key.substring(0, idx)); - String paramType = key.substring(idx + 1); - // Update MLN gradient - this.gradient.gradientForVariable().put(key, val); - // Update layer params - layers[layerId].update(val, paramType); - } - // Update layerwise gradient view - setBackpropGradientsViewArray(gradient.gradient()); - - } - - @Override - public INDArray activate(boolean training, LayerWorkspaceMgr mgr) { - throw new UnsupportedOperationException(); - } - - @Override - public INDArray activate(INDArray input, boolean training, LayerWorkspaceMgr mgr) { - throw new UnsupportedOperationException(); - } - - @Override - public void setInputMiniBatchSize(int size) { - if (layers != null) - for (Layer l : layers) - l.setInputMiniBatchSize(size); - } - - @Override - public int getInputMiniBatchSize() { - if(!conf().isMiniBatch()) - return 1; - - if (input.size(0) > Integer.MAX_VALUE) - throw new ND4JArraySizeException(); - return (int) input.size(0); - } - - @Override - public void setMaskArray(INDArray maskArray) { - throw new UnsupportedOperationException(); - } - - /** - * - * If this MultiLayerNetwork contains one or more RNN layers: conduct forward pass (prediction) - * but using previous stored state for any RNN layers. The activations for the final step are - * also stored in the RNN layers for use next time rnnTimeStep() is called.
- * This method can be used to generate output one or more steps at a time instead of always having to do - * forward pass from t=0. Example uses are for streaming data, and for generating samples from network output - * one step at a time (where samples are then fed back into the network as input)
- * If no previous state is present in RNN layers (i.e., initially or after calling rnnClearPreviousState()), - * the default initialization (usually 0) is used.
- * Supports mini-batch (i.e., multiple predictions/forward pass in parallel) as well as for single examples.
- * @param input Input to network. May be for one or multiple time steps. For single time step: - * input has shape [miniBatchSize,inputSize] or [miniBatchSize,inputSize,1]. miniBatchSize=1 for single example.
- * For multiple time steps: [miniBatchSize,inputSize,inputTimeSeriesLength] - * @return Output activations. If output is RNN layer (such as RnnOutputLayer): if input has shape [miniBatchSize,inputSize] - * i.e., is 2d, output has shape [miniBatchSize,outputSize] (i.e., also 2d).
- * Otherwise output is 3d [miniBatchSize,outputSize,inputTimeSeriesLength] when using RnnOutputLayer. - * @see #rnnTimeStep(INDArray, MemoryWorkspace) For outputting the activations in the specified workspace - */ - public INDArray rnnTimeStep(INDArray input) { - return rnnTimeStep(input, null); - } - - /** - * See {@link #rnnTimeStep(INDArray)} for details
- * If no memory workspace is provided, the output will be detached (not in any workspace).
- * If a memory workspace is provided, the output activation array (i.e., the INDArray returned by this method) - * will be placed in the specified workspace. This workspace must be opened by the user before calling this method - - * and the user is responsible for (a) closing this workspace, and (b) ensuring the output array is not used out - * of scope (i.e., not used after closing the workspace to which it belongs - as this is likely to cause either - * an exception when used, or a crash). - * - * @param input Input activations - * @param outputWorkspace Output workspace. May be null - * @return The output/activations from the network (either detached or in the specified workspace if provided) - */ - public INDArray rnnTimeStep(INDArray input, MemoryWorkspace outputWorkspace ) { + } + } catch (Throwable thr) { + t = thr; + } finally { + if (wsActGradCloseNext != null) { try { - boolean inputIs2d = input.rank() == 2; - INDArray out = outputOfLayerDetached(false, FwdPassType.RNN_TIMESTEP, layers.length - 1, input, null, null, outputWorkspace); - if (inputIs2d && out.rank() == 3 && layers[layers.length - 1].type() == Type.RECURRENT) { - //Return 2d output with shape [miniBatchSize,nOut] - // instead of 3d output with shape [miniBatchSize,nOut,1] - return out.tensorAlongDimension(0, 1, 0); - } - return out; - } catch (OutOfMemoryError e){ - CrashReportingUtil.writeMemoryCrashDump(this, e); - throw e; + wsActGradCloseNext.close(); + } catch (Throwable t2) { + if (t != null) { + log.error( + "Encountered second exception while trying to close workspace after initial exception"); + log.error("Original exception:", t); + throw t2; + } } - } - - /**Get the state of the RNN layer, as used in rnnTimeStep(). - * @param layer Number/index of the layer. - * @return Hidden state, or null if layer is not an RNN layer - */ - public Map rnnGetPreviousState(int layer) { - if (layer < 0 || layer >= layers.length) - throw new IllegalArgumentException("Invalid layer number"); - Layer l = layers[layer]; - if(l instanceof org.deeplearning4j.nn.layers.wrapper.BaseWrapperLayer){ - l = ((org.deeplearning4j.nn.layers.wrapper.BaseWrapperLayer)l).getUnderlying(); + } + if (wsActGradTemp != null) { + //Should only be non-null on exception + try { + wsActGradTemp.close(); + } catch (Throwable t2) { + if (t != null) { + log.error( + "Encountered second exception while trying to close workspace after initial exception"); + log.error("Original exception:", t); + throw t2; + } } - if (!(l instanceof RecurrentLayer)) - throw new IllegalArgumentException("Layer is not an RNN layer"); - return ((RecurrentLayer) l).rnnGetPreviousState(); - } + } + Nd4j.getMemoryManager().setCurrentWorkspace(initialWorkspace); - /**Set the state of the RNN layer. - * @param layer The number/index of the layer. - * @param state The state to set the specified layer to - */ - public void rnnSetPreviousState(int layer, Map state) { - if (layer < 0 || layer >= layers.length) - throw new IllegalArgumentException("Invalid layer number"); - Layer l = layers[layer]; - if(l instanceof org.deeplearning4j.nn.layers.wrapper.BaseWrapperLayer){ - l = ((org.deeplearning4j.nn.layers.wrapper.BaseWrapperLayer)l).getUnderlying(); + if (t != null) { + if (t instanceof RuntimeException) { + throw ((RuntimeException) t); } - if (!(l instanceof RecurrentLayer)) - throw new IllegalArgumentException("Layer is not an RNN layer"); - RecurrentLayer r = (RecurrentLayer) l; - r.rnnSetPreviousState(state); + throw new RuntimeException("Error during neural network forward pass", t); + } } - /** Clear the previous state of the RNN layers (if any). - */ - public void rnnClearPreviousState() { - if (layers == null) - return; - for (int i = 0; i < layers.length; i++) { - if (layers[i] instanceof RecurrentLayer) - ((RecurrentLayer) layers[i]).rnnClearPreviousState(); - else if (layers[i] instanceof MultiLayerNetwork) { - ((MultiLayerNetwork) layers[i]).rnnClearPreviousState(); - } else if(layers[i] instanceof BaseWrapperLayer && ((BaseWrapperLayer)layers[i]).getUnderlying() instanceof RecurrentLayer){ - ((RecurrentLayer) ((BaseWrapperLayer)layers[i]).getUnderlying()).rnnClearPreviousState(); - } + if (layerWiseConfigurations.getTrainingWorkspaceMode() == WorkspaceMode.NONE) { + WorkspaceUtils.assertNoWorkspacesOpen( + "Expected no workspace active in calcBackpropGradients when " + + "training workspace is set to none"); + } else { + if (epsilon == null) { + //If epsilon != null: external errors use case (inputs are detached instead) + WorkspaceUtils.assertOpenActiveAndCurrent(WS_ALL_LAYERS_ACT, + "calcBackpropGradients: WS_ALL_LAYERS_ACT is no" + + " longer the currently open/active workspace"); + } + } + + //Add gradients to Gradients (map), in correct order + for (Triple triple : gradientList) { + gradient.setGradientFor(triple.getFirst(), triple.getSecond(), triple.getThird()); + } + + return new Pair<>(gradient, currPair.getSecond()); + } + + protected void doTruncatedBPTT(INDArray input, INDArray labels, INDArray featuresMaskArray, + INDArray labelsMaskArray, LayerWorkspaceMgr workspaceMgr) { + if (input.rank() != 3 || labels.rank() != 3) { + log.warn( + "Cannot do truncated BPTT with non-3d inputs or labels. Expect input with shape [miniBatchSize,nIn,timeSeriesLength], got " + + Arrays.toString(input.shape()) + "\tand labels with shape " + + Arrays.toString(labels.shape())); + return; + } + if (input.size(2) != labels.size(2)) { + log.warn( + "Input and label time series have different lengths: {} input length, {} label length", + input.size(2), labels.size(2)); + return; + } + + int fwdLen = layerWiseConfigurations.getTbpttFwdLength(); + update(TaskUtils.buildTask(input, labels)); + val timeSeriesLength = input.size(2); + long nSubsets = timeSeriesLength / fwdLen; + if (timeSeriesLength % fwdLen != 0) { + nSubsets++; //Example: 100 fwdLen with timeSeriesLength=120 -> want 2 subsets (1 of size 100, 1 of size 20) + } + + rnnClearPreviousState(); + + for (int i = 0; i < nSubsets; i++) { + long startTimeIdx = (long) i * fwdLen; + long endTimeIdx = startTimeIdx + fwdLen; + if (endTimeIdx > timeSeriesLength) { + endTimeIdx = timeSeriesLength; + } + + if (startTimeIdx > Integer.MAX_VALUE || endTimeIdx > Integer.MAX_VALUE) { + throw new ND4JArraySizeException(); + } + INDArray[] subsets = getSubsetsForTbptt((int) startTimeIdx, (int) endTimeIdx, input, labels, + featuresMaskArray, labelsMaskArray); + + setInput(subsets[0]); + setLabels(subsets[1]); + setLayerMaskArrays(subsets[2], subsets[3]); + + if (solver == null) { + try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { + solver = new Solver.Builder().configure(conf()).listeners(getListeners()).model(this) + .build(); } + } + solver.optimize(workspaceMgr); + + //Finally, update the state of the RNN layers: + updateRnnStateWithTBPTTState(); } - /** Similar to rnnTimeStep and feedForward() methods. Difference here is that this method:
- * (a) like rnnTimeStep does forward pass using stored state for RNN layers, and
- * (b) unlike rnnTimeStep does not modify the RNN layer state
- * Therefore multiple calls to this method with the same input should have the same output.
- * Typically used during training only. Use rnnTimeStep for prediction/forward pass at test time. - * @param input Input to network - * @param training Whether training or not - * @param storeLastForTBPTT set to true if used as part of truncated BPTT training - * @return Activations for each layer (including input, as per feedforward() etc) - */ - public List rnnActivateUsingStoredState(INDArray input, boolean training, boolean storeLastForTBPTT) { - return ffToLayerActivationsDetached(training, FwdPassType.RNN_ACTIVATE_WITH_STORED_STATE, storeLastForTBPTT, layers.length-1, input, mask, null, false); + rnnClearPreviousState(); + clearLayerMaskArrays(); + } + + private INDArray[] getSubsetsForTbptt(int startTimeIdx, int endTimeIdx, INDArray input, + INDArray labels, + INDArray fMask, INDArray lMask) { + INDArray[] out = new INDArray[4]; + out[0] = input.get(NDArrayIndex.all(), NDArrayIndex.all(), + NDArrayIndex.interval(startTimeIdx, endTimeIdx)); + out[1] = labels.get(NDArrayIndex.all(), NDArrayIndex.all(), + NDArrayIndex.interval(startTimeIdx, endTimeIdx)); + + if (fMask != null) { + out[2] = fMask.get(NDArrayIndex.all(), + NDArrayIndex.interval(startTimeIdx, endTimeIdx)); + } + if (lMask != null) { + out[3] = lMask.get(NDArrayIndex.all(), + NDArrayIndex.interval(startTimeIdx, endTimeIdx)); } - /** Get the updater for this MultiLayerNetwork - * @return Updater for MultiLayerNetwork - */ - public Updater getUpdater() { - return getUpdater(true); + return out; + } + + /** + * Intended for internal/developer use + */ + public void updateRnnStateWithTBPTTState() { + for (int i = 0; i < layers.length; i++) { + if (layers[i] instanceof RecurrentLayer) { + RecurrentLayer l = ((RecurrentLayer) layers[i]); + l.rnnSetPreviousState(l.rnnGetTBPTTState()); + } else if (layers[i] instanceof MultiLayerNetwork) { + ((MultiLayerNetwork) layers[i]).updateRnnStateWithTBPTTState(); + } + } + } + + /** + * Get the {@link TrainingListener}s set for this network, if any + * + * @return listeners set for this network + */ + public Collection getListeners() { + return trainingListeners; + } + + @Override + public void setListeners(Collection listeners) { + if (layers == null) { + init(); + } + for (Layer layer : layers) { + layer.setListeners(listeners); } - public Updater getUpdater(boolean initializeIfReq) { - if (solver == null && initializeIfReq) { - synchronized(this){ - if(solver == null) { //May have been created while waiting for lock - solver = new Solver.Builder().configure(conf()).listeners(getListeners()).model(this).build(); - solver.getOptimizer().setUpdater(UpdaterCreator.getUpdater(this)); - } - } + if (solver != null) { + solver.setListeners(listeners); + } + + this.trainingListeners.clear(); + if (listeners != null) { + this.trainingListeners.addAll(listeners); + } + } + + @Override + public void setListeners(TrainingListener... listeners) { + Collection cListeners = new ArrayList<>(); + //Check: user might have done setListeners(null) thinking this would clear the current listeners. + //This results in an TrainingListener[1] with a single null value -> results in a NPE later + if (listeners != null && listeners.length > 0) { + for (TrainingListener i : listeners) { + if (i != null) { + cListeners.add(i); } - if(solver != null) { - return solver.getOptimizer().getUpdater(initializeIfReq); - } - return null; + } + } + setListeners(cListeners); + } + + /** + * @deprecated Use {@link #getListeners()} + */ + @Deprecated + public Collection getTrainingListeners() { + return trainingListeners; + } + + /** + * This method ADDS additional TrainingListener to existing listeners + * + * @param listeners + */ + @Override + public void addListeners(TrainingListener... listeners) { + Collections.addAll(trainingListeners, listeners); + + // fixme this is wrong, since it removes existing listeners from the solver + if (solver != null) { + solver.setListeners(this.trainingListeners); + } + } + + /** + * Usable only for classification networks in conjunction with OutputLayer. Cannot be used with + * RnnOutputLayer, CnnLossLayer, or networks used for regression.
To get the raw output + * activations of the output layer, use {@link #output(INDArray)} or similar.
+ *
+ * Equivalent to argmax(this.output(input)): Returns the predicted class indices corresponding to + * the predictions for each example in the features array. + * + * @param d The input features to perform inference on + * @return The predicted class index for each example + */ + @Override + public int[] predict(INDArray d) { + INDArray output = output(d, Layer.TrainingMode.TEST); + + if (d.size(0) > Integer.MAX_VALUE) { + throw new ND4JArraySizeException(); } - /** Set the updater for the MultiLayerNetwork */ - public void setUpdater(Updater updater) { - if (solver == null) { - solver = new Solver.Builder().configure(conf()).listeners(getListeners()).model(this).build(); - } - solver.getOptimizer().setUpdater(updater); + Preconditions.checkState(output.rank() == 2, + "predict(INDArray) method can only be used on rank 2 output - got array with rank %s", + output.rank()); + return output.argMax(1).toIntVector(); + } + + /** + * As per {@link #predict(INDArray)} but the returned values are looked up from the list of label + * names in the provided DataSet + */ + @Override + public List predict(org.nd4j.linalg.dataset.api.DataSet dataSet) { + Preconditions.checkState(dataSet.getLabelNamesList() != null, + "This method can only be used when the DataSet contains a label name list"); + int[] intRet = predict(dataSet.getFeatures()); + List ret = new ArrayList<>(); + for (int i = 0; i < intRet.length; i++) { + ret.add(i, dataSet.getLabelName(intRet[i])); + } + return ret; + } + + /** + * Fit the model for one iteration on the provided data + * + * @param data the examples to classify (one example in each row) + * @param labels the example labels(a binary outcome matrix) + */ + @Override + public void fit(INDArray data, INDArray labels) { + fit(data, labels, null, null); + } + + /** + * Fit the model for one iteration on the provided data + * + * @param features the examples to classify (one example in each row) + * @param labels the example labels(a binary outcome matrix) + * @param featuresMask The mask array for the features (used for variable length time series, + * etc). May be null. + * @param labelsMask The mask array for the labels (used for variable length time series, etc). + * May be null. + */ + public synchronized void fit(INDArray features, INDArray labels, INDArray featuresMask, + INDArray labelsMask) { + try { + fitHelper(features, labels, featuresMask, labelsMask); + } catch (OutOfMemoryError e) { + CrashReportingUtil.writeMemoryCrashDump(this, e); + throw e; + } + } + + private void fitHelper(INDArray features, INDArray labels, INDArray featuresMask, + INDArray labelsMask) { + if (numParams() == 0) { + //No op: can't fit a network with 0 parameters + return; } - /**Set the mask arrays for features and labels. Mask arrays are typically used in situations such as one-to-many - * and many-to-one learning with recurrent neural networks, as well as for supporting time series of varying lengths - * within the same minibatch.
- * For example, with RNN data sets with input of shape [miniBatchSize,nIn,timeSeriesLength] and outputs of shape - * [miniBatchSize,nOut,timeSeriesLength], the features and mask arrays will have shape [miniBatchSize,timeSeriesLength] - * and contain values 0 or 1 at each element (to specify whether a given input/example is present - or merely padding - - * at a given time step).
- * NOTE: This method is not usually used directly. Instead, methods such as {@link #feedForward(INDArray, INDArray, INDArray)} - * and {@link #output(INDArray, boolean, INDArray, INDArray)} handle setting of masking internally. - * @param featuresMaskArray Mask array for features (input) - * @param labelsMaskArray Mask array for labels (output) - * @see #clearLayerMaskArrays() - */ - public void setLayerMaskArrays(INDArray featuresMaskArray, INDArray labelsMaskArray) { - if (featuresMaskArray != null) { + setInput(features); + setLabels(labels); + this.setLayerMaskArrays(featuresMask, labelsMask); + update(TaskUtils.buildTask(features, labels)); - if (featuresMaskArray.size(0) > Integer.MAX_VALUE) - throw new ND4JArraySizeException(); - //New approach: use feedForwardMaskArray method - feedForwardMaskArray(featuresMaskArray, MaskState.Active, (int) featuresMaskArray.size(0)); + LayerWorkspaceMgr workspaceMgr; + if (layerWiseConfigurations.getTrainingWorkspaceMode() == null) { + workspaceMgr = LayerWorkspaceMgr.noWorkspaces(); + } else { + workspaceMgr = LayerWorkspaceMgr.builder() + .with(ArrayType.INPUT, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) + .with(ArrayType.ACTIVATIONS, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) + //Note for updater working memory, we have the option to re-use WS_ALL_LAYERS_ACT or FF/BP_WORKING_MEM + // these should be closed by the time updaters are executed + //Generally, WS_ALL_LAYERS_ACT will be the larger of the two, so we'll use this + .with(ArrayType.UPDATER_WORKING_MEM, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) + .build(); + } + workspaceMgr.setHelperWorkspacePointers(helperWorkspaces); + + if (layerWiseConfigurations.getBackpropType() == BackpropType.TruncatedBPTT) { + doTruncatedBPTT(features, labels, featuresMask, labelsMask, workspaceMgr); + } else { + if (solver == null) { + try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { + solver = new Solver.Builder().configure(conf()).listeners(getListeners()).model(this) + .build(); + } + } + //TODO CACHE WORKSPACE, IF USED??? + solver.optimize(workspaceMgr); + } + + clearLayerMaskArrays(); + clearLayersStates(); + synchronizeIterEpochCounts(); + } + + @Override + public void fit(INDArray data, LayerWorkspaceMgr workspaceMgr) { + throw new UnsupportedOperationException("Not supported: use pretrainLayer"); + } + + /** + * Fit the model for one iteration on the provided data + * + * @param data the data to train on + */ + @Override + public void fit(org.nd4j.linalg.dataset.api.DataSet data) { + fit(data.getFeatures(), data.getLabels(), data.getFeaturesMaskArray(), + data.getLabelsMaskArray()); + } + + /** + * Fit the model for one iteration on the provided data + * + * @param examples the examples to classify (one example in each row) + * @param labels the labels for each example (the number of labels must match + */ + @Override + public void fit(INDArray examples, int[] labels) { + org.deeplearning4j.nn.conf.layers.OutputLayer layerConf = + (org.deeplearning4j.nn.conf.layers.OutputLayer) getOutputLayer().conf().getLayer(); + + if (layerConf.getNOut() > Integer.MAX_VALUE) { + throw new ND4JArraySizeException(); + } + fit(examples, FeatureUtil.toOutcomeMatrix(labels, (int) layerConf.getNOut())); + } + + /** + * Perform inference on the provided input/features - i.e., perform forward pass using the + * provided input/features and return the output of the final layer. + * + * @param input Input to the network + * @param train whether the output is test or train. This mainly affect hyper parameters such as + * dropout and batch normalization, which have different behaviour for test vs. + * train + * @return The network predictions - i.e., the activations of the final layer + */ + public INDArray output(INDArray input, TrainingMode train) { + return output(input, train == TrainingMode.TRAIN); + } + + /** + * Perform inference on the provided input/features - i.e., perform forward pass using the + * provided input/features and return the output of the final layer. + * + * @param input Input to the network + * @param train whether the output is test or train. This mainly affect hyper parameters such as + * dropout and batch normalization, which have different behaviour for test vs. + * train + * @return The network predictions - i.e., the activations of the final layer + */ + public INDArray output(INDArray input, boolean train) { + return output(input, train, null, null); + } + + /** + * Calculate the output of the network, with masking arrays. The masking arrays are used in + * situations such as one-to-many and many-to-one recurrent neural network (RNN) designs, as well + * as for supporting time series of varying lengths within the same minibatch. + */ + public INDArray output(INDArray input, boolean train, INDArray featuresMask, + INDArray labelsMask) { + return output(input, train, featuresMask, labelsMask, null); + } + + /** + * Get the network output, which is optionally placed in the specified memory workspace.
If no + * memory workspace is provided, the output will be detached (not in any workspace).
If a + * memory workspace is provided, the output activation array (i.e., the INDArray returned by this + * method) will be placed in the specified workspace. This workspace must be opened by the user + * before calling this method - and the user is responsible for (a) closing this workspace, and + * (b) ensuring the output array is not used out of scope (i.e., not used after closing the + * workspace to which it belongs - as this is likely to cause either an exception when used, or a + * crash). + * + * @param input Input to the network + * @param train True for train, false otherwise + * @param outputWorkspace May be null. If not null: the workspace MUST be opened before calling + * this method. + * @return The output/activations from the network (either detached or in the specified workspace + * if provided) + */ + public INDArray output(INDArray input, boolean train, MemoryWorkspace outputWorkspace) { + return output(input, train, null, null, outputWorkspace); + } + + /** + * Get the network output, which is optionally placed in the specified memory workspace.
If no + * memory workspace is provided, the output will be detached (not in any workspace).
If a + * memory workspace is provided, the output activation array (i.e., the INDArray returned by this + * method) will be placed in the specified workspace. This workspace must be opened by the user + * before calling this method - and the user is responsible for (a) closing this workspace, and + * (b) ensuring the output array is not used out of scope (i.e., not used after closing the + * workspace to which it belongs - as this is likely to cause either an exception when used, or a + * crash). + * + * @param input Input to the network + * @param train True for train, false otherwise + * @param outputWorkspace May be null. If not null: the workspace MUST be opened before calling + * this method. + * @return The output/activations from the network (either detached or in the specified workspace + * if provided) + */ + public synchronized INDArray output(INDArray input, boolean train, INDArray featuresMask, + INDArray labelsMask, MemoryWorkspace outputWorkspace) { + try { + return outputOfLayerDetached(train, FwdPassType.STANDARD, layers.length - 1, input, + featuresMask, labelsMask, outputWorkspace); + } catch (OutOfMemoryError e) { + CrashReportingUtil.writeMemoryCrashDump(this, e); + throw e; + } + } + + /** + * This method uses provided OutputAdapter to return custom object built from INDArray + *

+ * PLEASE NOTE: This method uses dedicated Workspace for output generation to avoid redundant + * allocations + * + * @param inputs Input arrays to the netwonk + * @param inputMasks Optional input mask arrays (may be null) + * @param labelMasks Optional label mask arrays (may be null + * @param outputAdapter OutputAdapter instance + * @param T extends Object + * @return T instance produced by OutputAdapter + */ + public synchronized T output(@NonNull INDArray inputs, INDArray inputMasks, + INDArray labelMasks, @NonNull OutputAdapter outputAdapter) { + try (val ws = Nd4j.getWorkspaceManager() + .getAndActivateWorkspace(WS_ALL_LAYERS_ACT_CONFIG, WS_OUTPUT_MEM)) { + if (outputAdapter instanceof ModelAdapter) { + return ((ModelAdapter) outputAdapter).apply(this, new INDArray[]{inputs}, + new INDArray[]{inputMasks}, new INDArray[]{labelMasks}); + } else { + return outputAdapter.apply(output(inputs, false, inputMasks, labelMasks, ws)); + } + } + } + + /** + * Perform inference on the provided input/features - i.e., perform forward pass using the + * provided input/features and return the output of the final layer. Equivalent to + * {@link #output(INDArray, boolean)} with train=false - i.e., this method is used for inference. + * + * @param input Input to the network + * @return The network predictions - i.e., the activations of the final layer + */ + public INDArray output(INDArray input) { + return output(input, TrainingMode.TEST); + } + + /** + * Generate the output for all examples/batches in the input iterator, and concatenate them into a + * single array. See {@link #output(INDArray)}
NOTE 1: The output array can require a + * considerable amount of memory for iterators with a large number of examples
NOTE 2: This + * method cannot be used for variable length time series outputs, as this would require padding + * arrays for some outputs, or returning a mask array (which cannot be done with this method). For + * variable length time series applications, use one of the other output methods. This method also + * cannot be used with fully convolutional networks with different output sizes (for example, + * segmentation on different input image sizes). + * + * @param iterator Data to pass through the network + * @return output for all examples in the iterator, concatenated into a + */ + public INDArray output(DataSetIterator iterator, boolean train) { + List outList = new ArrayList<>(); + long[] firstOutputShape = null; + while (iterator.hasNext()) { + DataSet next = iterator.next(); + INDArray features = next.getFeatures(); + + if (features == null) { + continue; + } + + INDArray fMask = next.getFeaturesMaskArray(); + INDArray lMask = next.getLabelsMaskArray(); + INDArray output = this.output(features, train, fMask, lMask); + outList.add(output); + if (firstOutputShape == null) { + firstOutputShape = output.shape(); + } else { + //Validate that shapes are the same (may not be, for some RNN variable length time series applications) + long[] currShape = output.shape(); + Preconditions.checkState(firstOutputShape.length == currShape.length, + "Error during forward pass:" + + "different minibatches have different output array ranks - first minibatch shape %s, last minibatch shape %s", + firstOutputShape, currShape); + for (int i = 1; i < currShape.length; + i++) { //Skip checking minibatch dimension, fine if this varies + Preconditions.checkState(firstOutputShape[i] == currShape[i], + "Current output shape does not match first" + + " output array shape at position %s: all dimensions must match other than the first dimension.\n" + + + " For variable length output size/length use cases such as for RNNs with multiple sequence lengths," + + + " use one of the other (non iterator) output methods. First batch output shape: %s, current batch output shape: %s", + i, firstOutputShape, currShape); + } + } + } + return Nd4j.concat(0, outList.toArray(new INDArray[outList.size()])); + } + + /** + * Equivalent to {@link #output(DataSetIterator, boolean)} with train=false + */ + public INDArray output(DataSetIterator iterator) { + return output(iterator, false); + } + + /** + * Perform inference and then calculate the F1 score of the output(input) vs. the labels. + * + * @param input the input to perform inference with + * @param labels the true labels + * @return the score for the given input,label pairs + */ + @Override + public double f1Score(INDArray input, INDArray labels) { + feedForward(input); + setLabels(labels); + Evaluation eval = new Evaluation(); + eval.eval(labels, output(input)); + return eval.f1(); + } + + /** + * @deprecated Will be removed in a future release + */ + @Deprecated + @Override + public int numLabels() { + return (int) labels.size(1); + } + + /** + * Sets the input and labels and calculates the score (value of the output layer loss function + * plus l1/l2 if applicable) for the prediction with respect to the true labels
This is + * equivalent to {@link #score(DataSet, boolean)} with training==false. + * + * @param data the data to score + * @return the score for the given input,label pairs + * @see #score(DataSet, boolean) + */ + public double score(DataSet data) { + return score(data, false); + } + + /** + * Sets the input and labels and calculates the score (value of the output layer loss function + * plus l1/l2 if applicable) for the prediction with respect to the true labels
+ * + * @param data data to calculate score for + * @param training If true: score during training. If false: score at test time. This can affect + * the application of certain features, such as dropout and dropconnect (which are + * applied at training time only) + * @return the score (value of the loss function) + */ + public double score(DataSet data, boolean training) { + try { + return scoreHelper(data, training); + } catch (OutOfMemoryError e) { + CrashReportingUtil.writeMemoryCrashDump(this, e); + throw e; + } + } + + private double scoreHelper(DataSet data, boolean training) { + boolean hasMaskArray = data.hasMaskArrays(); + if (hasMaskArray) { + setLayerMaskArrays(data.getFeaturesMaskArray(), data.getLabelsMaskArray()); + } + + if (!(getOutputLayer() instanceof IOutputLayer)) { + throw new IllegalStateException( + "Cannot calculate score if final layer is not an instance of IOutputLayer. " + + "Final layer is of type: " + getOutputLayer().getClass()); + } + + WorkspaceMode wsm = (training ? layerWiseConfigurations.getTrainingWorkspaceMode() + : layerWiseConfigurations.getInferenceWorkspaceMode()); + LayerWorkspaceMgr mgr; + if (wsm == WorkspaceMode.NONE) { + mgr = LayerWorkspaceMgr.noWorkspaces(); + } else { + mgr = LayerWorkspaceMgr.builder() + .with(ArrayType.FF_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) + .with(ArrayType.RNN_FF_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, + WS_RNN_LOOP_WORKING_MEM_CONFIG) + //TODO we can probably optimize this + .noWorkspaceFor(ArrayType.ACTIVATIONS) + .noWorkspaceFor(ArrayType.INPUT) + .build(); + } + mgr.setHelperWorkspacePointers(helperWorkspaces); + + INDArray inputToOutputLayer = outputOfLayerDetached(training, FwdPassType.STANDARD, + layers.length - 2, data.getFeatures(), + data.getFeaturesMaskArray(), data.getLabelsMaskArray(), null); + + if (data.getFeatures().size(0) > Integer.MAX_VALUE) { + throw new ND4JArraySizeException(); + } + IOutputLayer ol = (IOutputLayer) getOutputLayer(); + if (getLayerWiseConfigurations().getInputPreProcess(layers.length - 1) != null) { + inputToOutputLayer = getLayerWiseConfigurations().getInputPreProcess(layers.length - 1) + .preProcess(inputToOutputLayer, (int) data.getFeatures().size(0), mgr); + } + ol.setInput(inputToOutputLayer, mgr); //Feedforward doesn't include output layer for efficiency + ol.setLabels(data.getLabels()); + double score; + try (MemoryWorkspace ws = mgr.notifyScopeEntered(ArrayType.FF_WORKING_MEM)) { + score = ol.computeScore(calcRegularizationScore(true), training, mgr); + } + + if (hasMaskArray) { + clearLayerMaskArrays(); + } + clearLayersStates(); + + return score; + } + + /** + * As per {@link #scoreExamples(DataSet, boolean)} - the outputs (example scores) for all DataSets + * in the iterator are concatenated + */ + public INDArray scoreExamples(DataSetIterator iter, boolean addRegularizationTerms) { + List out = new ArrayList<>(); + + while (iter.hasNext()) { + out.add(scoreExamples(iter.next(), addRegularizationTerms)); + } + return Nd4j.toFlattened('f', out); + } + + /** + * Calculate the score for each example in a DataSet individually. Unlike {@link #score(DataSet)} + * and {@link #score(DataSet, boolean)} this method does not average/sum over examples. This + * method allows for examples to be scored individually (at test time only), which may be useful + * for example for autoencoder architectures and the like.
Each row of the output (assuming + * addRegularizationTerms == true) is equivalent to calling score(DataSet) with a single example. + * + * @param data The data to score + * @param addRegularizationTerms If true: add l1/l2 regularization terms (if any) to the score. If + * false: don't add regularization terms + * @return An INDArray (column vector) of size input.numRows(); the ith entry is the score (loss + * value) of the ith example + */ + public INDArray scoreExamples(DataSet data, boolean addRegularizationTerms) { + try { + return scoreExamplesHelper(data, addRegularizationTerms); + } catch (OutOfMemoryError e) { + CrashReportingUtil.writeMemoryCrashDump(this, e); + throw e; + } + } + + private INDArray scoreExamplesHelper(DataSet data, boolean addRegularizationTerms) { + INDArray inputLast = outputOfLayerDetached(false, FwdPassType.STANDARD, layers.length - 2, + data.getFeatures(), + data.getFeaturesMaskArray(), data.getLabelsMaskArray(), null); + setLabels(data.getLabels()); + setLayerMaskArrays(data.getFeaturesMaskArray(), data.getLabelsMaskArray()); + + //TODO we might want workspaces here? + LayerWorkspaceMgr mgr = LayerWorkspaceMgr.noWorkspaces(); + + INDArray out; + if (getOutputLayer() instanceof IOutputLayer) { + IOutputLayer ol = (IOutputLayer) getOutputLayer(); + if (layerWiseConfigurations.getInputPreProcess(layers.length - 1) != null) { + + if (data.getFeatures().size(0) > Integer.MAX_VALUE) { + throw new ND4JArraySizeException(); + } + inputLast = layerWiseConfigurations.getInputPreProcess(layers.length - 1) + .preProcess(inputLast, + (int) data.getFeatures().size(0), mgr); + } + ol.setLabels(data.getLabels()); + ol.setInput(inputLast, mgr); + double r = (addRegularizationTerms ? calcRegularizationScore(true) : 0); + out = ol.computeScoreForExamples(r, mgr); + } else { + throw new UnsupportedOperationException( + "Cannot calculate score with respect to labels without an OutputLayer"); + } + + clearLayersStates(); + clearLayerMaskArrays(); + return out; + } + + @Override + public void fit() { + fit(input, labels); + } + + @Override + public void update(INDArray gradient, String paramType) { + throw new UnsupportedOperationException("Not implemented"); + } + + /** + * Score of the model (relative to the objective function) - previously calculated on the last + * minibatch + * + * @return the score of the model (relative to the objective function) + */ + @Override + public double score() { + return score; + } + + /** + * Intended for developer/internal use + */ + public void setScore(double score) { + this.score = score; + } + + @Override + public void computeGradientAndScore(LayerWorkspaceMgr layerWorkspaceMgr) { + computeGradientAndScore(); + } + + public void computeGradientAndScore() { + + if (!(getOutputLayer() instanceof IOutputLayer)) { + throw new DL4JException( + "Cannot calculate gradient and score with respect to labels: final layer is not an IOutputLayer. " + + + "Final layer class: " + getOutputLayer().getClass() + + ". To calculate gradients and fit a network " + + "using backpropagation, the final layer must be an output layer"); + } + + //Note: Workspace manager is only ose here for score calculation... other workspace managers are used in the + // various FF/backprop methds + LayerWorkspaceMgr mgr; + if (layerWiseConfigurations.getTrainingWorkspaceMode() == WorkspaceMode.NONE) { + mgr = LayerWorkspaceMgr.noWorkspaces(); + } else { + mgr = LayerWorkspaceMgr.builder() + .with(ArrayType.INPUT, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) + .with(ArrayType.ACTIVATIONS, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) + .with(ArrayType.FF_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) + .with(ArrayType.BP_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) + .with(ArrayType.RNN_FF_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, + WS_RNN_LOOP_WORKING_MEM_CONFIG) + .with(ArrayType.RNN_BP_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, + WS_RNN_LOOP_WORKING_MEM_CONFIG) + .build(); + + if (layerWiseConfigurations.getCacheMode() != null) { + //For now: store cache mode activations in activations workspace + mgr.setWorkspace(ArrayType.FF_CACHE, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG); + } + } + + boolean tbptt = layerWiseConfigurations.getBackpropType() == BackpropType.TruncatedBPTT; + FwdPassType fwdType = (tbptt ? FwdPassType.RNN_ACTIVATE_WITH_STORED_STATE + : FwdPassType.STANDARD); + synchronizeIterEpochCounts(); + + //Calculate activations (which are stored in each layer, and used in backprop) + try (MemoryWorkspace ws = mgr.notifyScopeEntered(ArrayType.ACTIVATIONS)) { + //First: do a feed-forward through the network + //Note that we don't actually need to do the full forward pass through the output layer right now; but we do + // need the input to the output layer to be set (such that backprop can be done) + List activations = ffToLayerActivationsInWs(layers.length - 2, fwdType, tbptt, + input, mask, null); + if (!trainingListeners.isEmpty()) { + //TODO: We possibly do want output layer activations in some cases here... + for (TrainingListener tl : trainingListeners) { + tl.onForwardPass(this, activations); + } + } + INDArray inputToOutputLayer = activations.get(activations.size() - 1); + if (layerWiseConfigurations.getInputPreProcess(layers.length - 1) != null) { + inputToOutputLayer = layerWiseConfigurations.getInputPreProcess(layers.length - 1) + .preProcess(inputToOutputLayer, getInputMiniBatchSize(), mgr); + //Validate activations location + } + getOutputLayer().setInput(inputToOutputLayer, mgr); + //Then: compute gradients + Pair pair = calcBackpropGradients(null, true, false, false); + this.gradient = (pair == null ? null : pair.getFirst()); + + //Calculate score + try (MemoryWorkspace wsFF = mgr.notifyScopeEntered(ArrayType.FF_WORKING_MEM)) { + double r = calcRegularizationScore(true); + score = ((IOutputLayer) getOutputLayer()).computeScore(r, true, mgr); + } + + //Listeners + if (!trainingListeners.isEmpty()) { + try (MemoryWorkspace workspace = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { + for (TrainingListener tl : trainingListeners) { + tl.onBackwardPass(this); + } + } + } + } + + //Clear the post noise/dropconnect parameters on the output layer + getOutputLayer().clearNoiseWeightParams(); + } + + /** + * Clear the inputs. Clears optimizer state. + */ + public void clear() { + for (Layer layer : layers) { + layer.clear(); + } + + input = null; + labels = null; + solver = null; + } + + @Override + public void applyConstraints(int iteration, int epoch) { + for (Layer l : layers) { + l.applyConstraints(iteration, epoch); + } + } + + @Override + public void setInput(INDArray input, LayerWorkspaceMgr mgr) { + throw new UnsupportedOperationException("Not supported"); + } + + /** + * Get the output layer - i.e., the last layer in the netwok + * + * @return + */ + public Layer getOutputLayer() { + Layer ret = getLayers()[getLayers().length - 1]; + if (ret instanceof FrozenLayerWithBackprop) { + ret = ((FrozenLayerWithBackprop) ret).getInsideLayer(); + } + return ret; + } + + + /** + * See {@link #setParams(INDArray)} + */ + public void setParameters(INDArray params) { + setParams(params); + } + + /** + * Intended for internal/developer use + */ + public NeuralNetConfiguration getDefaultConfiguration() { + return defaultConfiguration; + } + + public INDArray getLabels() { + return labels; + } + + /** + * @param labels Labels to set + */ + public void setLabels(INDArray labels) { + this.labels = labels; + } + + public INDArray getInput() { + return input; + } + + /** + * Set the input array for the network + * + * @param input Input array to set + */ + public void setInput(INDArray input) { + this.input = input; + if (this.layers == null) { + init(); + } + if (input != null) { + if (input.length() == 0) { + throw new IllegalArgumentException( + "Invalid input: length 0 (shape: " + Arrays.toString(input.shape()) + ")"); + } + + if (input.size(0) > Integer.MAX_VALUE) { + throw new ND4JArraySizeException(); + } + setInputMiniBatchSize((int) input.size(0)); + } + } + + /** + * Get the number of layers in the network + * + * @return the number of layers in the network + */ + public int getnLayers() { + return layerWiseConfigurations.getConfs().size(); + } + + /** + * @return The layers in the network + */ + public synchronized Layer[] getLayers() { + return layers; + } + + public void setLayers(Layer[] layers) { + this.layers = layers; + } + + public Layer getLayer(int i) { + Preconditions.checkArgument(i >= 0 && i < layers.length, + "Invalid layer index: layer index must be 0" + + " to %s (inclusive), got index %s", layers.length - 1, i); + return layers[i]; + } + + public Layer getLayer(String name) { + return layerMap.get(name); + } + + public List getLayerNames() { + return new ArrayList<>(layerMap.keySet()); + } + + public INDArray getMask() { + return mask; + } + + public void setMask(INDArray mask) { + this.mask = mask; + } + + public INDArray getMaskArray() { + return mask; + } + + @Override + public void setMaskArray(INDArray maskArray) { + throw new UnsupportedOperationException(); + } + + @Override + public boolean isPretrainLayer() { + return false; + } + + @Override + public void clearNoiseWeightParams() { + for (Layer l : layers) { + l.clearNoiseWeightParams(); + } + } + + @Override + public void allowInputModification(boolean allow) { + throw new UnsupportedOperationException("Not supported"); + } + + //========== + //Layer methods + + @Override + public Pair feedForwardMaskArray(INDArray maskArray, + MaskState currentMaskState, + int minibatchSize) { + if (maskArray == null) { + for (int i = 0; i < layers.length; i++) { + layers[i].feedForwardMaskArray(null, null, minibatchSize); + } + } else { + //Do a forward pass through each preprocessor and layer + for (int i = 0; i < layers.length; i++) { + InputPreProcessor preProcessor = getLayerWiseConfigurations().getInputPreProcess(i); + + if (preProcessor != null) { + Pair p = + preProcessor.feedForwardMaskArray(maskArray, currentMaskState, minibatchSize); + if (p != null) { + maskArray = p.getFirst(); + currentMaskState = p.getSecond(); + } else { + maskArray = null; + currentMaskState = null; + } + } + + Pair p = + layers[i].feedForwardMaskArray(maskArray, currentMaskState, minibatchSize); + if (p != null) { + maskArray = p.getFirst(); + currentMaskState = p.getSecond(); + } else { + maskArray = null; + currentMaskState = null; + } + } + } + + return new Pair<>(maskArray, currentMaskState); + } + + @Override + public LayerHelper getHelper() { + throw new UnsupportedOperationException("Not supported"); + } + + @Override + public Type type() { + return Type.MULTILAYER; + } + + /** + * Equivalent to {@link #output(INDArray)} using the input set via {@link #setInput(INDArray)} + */ + public INDArray activate(TrainingMode training) { + return output(input, training == TrainingMode.TRAIN); + } + + /** + * Equivalent to {@link #output(INDArray, TrainingMode)} + */ + public INDArray activate(INDArray input, TrainingMode training) { + return output(input, training == TrainingMode.TRAIN); + } + + @Override + public Pair backpropGradient(INDArray epsilon, + LayerWorkspaceMgr workspaceMgr) { + if (getOutputLayer() instanceof IOutputLayer) { + throw new UnsupportedOperationException( + "Cannot calculate gradients based on epsilon with OutputLayer"); + } + + return calcBackpropGradients(epsilon, false, false, true); + } + + @Override + public int getIndex() { + return layerIndex; + } + + @Override + public void setIndex(int index) { + layerIndex = index; + } + + @Override + public int getIterationCount() { + return getLayerWiseConfigurations().getIterationCount(); + } + + @Override + public void setIterationCount(int iterationCount) { + getLayerWiseConfigurations().setIterationCount(iterationCount); + } + + @Override + public int getEpochCount() { + return getLayerWiseConfigurations().getEpochCount(); + } + + @Override + public void setEpochCount(int epochCount) { + getLayerWiseConfigurations().setEpochCount(epochCount); + } + + @Override + public double calcRegularizationScore(boolean backpropParamsOnly) { + double scoreSum = 0.0; + for (int i = 0; i < layers.length; i++) { + scoreSum += layers[i].calcRegularizationScore(backpropParamsOnly); + } + return scoreSum; + } + + @Override + public void update(Gradient gradient) { + if (gradient.gradient().length() != numParams(true)) { + throw new IllegalArgumentException( + "Invalid input: expect gradients array of length " + numParams(true)); + } + for (Map.Entry entry : gradient.gradientForVariable().entrySet()) { + String key = entry.getKey(); + INDArray val = entry.getValue(); + int idx = key.indexOf('_'); + if (idx == -1) { + throw new IllegalStateException( + "Invalid param key: not have layer separator: \"" + key + "\""); + } + Integer layerId = Integer.parseInt(key.substring(0, idx)); + String paramType = key.substring(idx + 1); + // Update MLN gradient + this.gradient.gradientForVariable().put(key, val); + // Update layer params + layers[layerId].update(val, paramType); + } + // Update layerwise gradient view + setBackpropGradientsViewArray(gradient.gradient()); + + } + + @Override + public INDArray activate(boolean training, LayerWorkspaceMgr mgr) { + throw new UnsupportedOperationException(); + } + + @Override + public INDArray activate(INDArray input, boolean training, LayerWorkspaceMgr mgr) { + throw new UnsupportedOperationException(); + } + + @Override + public int getInputMiniBatchSize() { + if (!conf().isMiniBatch()) { + return 1; + } + + if (input.size(0) > Integer.MAX_VALUE) { + throw new ND4JArraySizeException(); + } + return (int) input.size(0); + } + + @Override + public void setInputMiniBatchSize(int size) { + if (layers != null) { + for (Layer l : layers) { + l.setInputMiniBatchSize(size); + } + } + } + + /** + * If this MultiLayerNetwork contains one or more RNN layers: conduct forward pass (prediction) + * but using previous stored state for any RNN layers. The activations for the final step are also + * stored in the RNN layers for use next time rnnTimeStep() is called.
This method can be used + * to generate output one or more steps at a time instead of always having to do forward pass from + * t=0. Example uses are for streaming data, and for generating samples from network output one + * step at a time (where samples are then fed back into the network as input)
If no previous + * state is present in RNN layers (i.e., initially or after calling rnnClearPreviousState()), the + * default initialization (usually 0) is used.
Supports mini-batch (i.e., multiple + * predictions/forward pass in parallel) as well as for single examples.
+ * + * @param input Input to network. May be for one or multiple time steps. For single time step: + * input has shape [miniBatchSize,inputSize] or [miniBatchSize,inputSize,1]. + * miniBatchSize=1 for single example.
For multiple time steps: + * [miniBatchSize,inputSize,inputTimeSeriesLength] + * @return Output activations. If output is RNN layer (such as RnnOutputLayer): if input has shape + * [miniBatchSize,inputSize] i.e., is 2d, output has shape [miniBatchSize,outputSize] (i.e., also + * 2d).
Otherwise output is 3d [miniBatchSize,outputSize,inputTimeSeriesLength] when using + * RnnOutputLayer. + * @see #rnnTimeStep(INDArray, MemoryWorkspace) For outputting the activations in the specified + * workspace + */ + public INDArray rnnTimeStep(INDArray input) { + return rnnTimeStep(input, null); + } + + /** + * See {@link #rnnTimeStep(INDArray)} for details
If no memory workspace is provided, the + * output will be detached (not in any workspace).
If a memory workspace is provided, the + * output activation array (i.e., the INDArray returned by this method) will be placed in the + * specified workspace. This workspace must be opened by the user before calling this method - and + * the user is responsible for (a) closing this workspace, and (b) ensuring the output array is + * not used out of scope (i.e., not used after closing the workspace to which it belongs - as this + * is likely to cause either an exception when used, or a crash). + * + * @param input Input activations + * @param outputWorkspace Output workspace. May be null + * @return The output/activations from the network (either detached or in the specified workspace + * if provided) + */ + public INDArray rnnTimeStep(INDArray input, MemoryWorkspace outputWorkspace) { + try { + boolean inputIs2d = input.rank() == 2; + INDArray out = outputOfLayerDetached(false, FwdPassType.RNN_TIMESTEP, layers.length - 1, + input, null, null, outputWorkspace); + if (inputIs2d && out.rank() == 3 && layers[layers.length - 1].type() == Type.RECURRENT) { + //Return 2d output with shape [miniBatchSize,nOut] + // instead of 3d output with shape [miniBatchSize,nOut,1] + return out.tensorAlongDimension(0, 1, 0); + } + return out; + } catch (OutOfMemoryError e) { + CrashReportingUtil.writeMemoryCrashDump(this, e); + throw e; + } + } + + /** + * Get the state of the RNN layer, as used in rnnTimeStep(). + * + * @param layer Number/index of the layer. + * @return Hidden state, or null if layer is not an RNN layer + */ + public Map rnnGetPreviousState(int layer) { + if (layer < 0 || layer >= layers.length) { + throw new IllegalArgumentException("Invalid layer number"); + } + Layer l = layers[layer]; + if (l instanceof org.deeplearning4j.nn.layers.wrapper.BaseWrapperLayer) { + l = ((org.deeplearning4j.nn.layers.wrapper.BaseWrapperLayer) l).getUnderlying(); + } + if (!(l instanceof RecurrentLayer)) { + throw new IllegalArgumentException("Layer is not an RNN layer"); + } + return ((RecurrentLayer) l).rnnGetPreviousState(); + } + + /** + * Set the state of the RNN layer. + * + * @param layer The number/index of the layer. + * @param state The state to set the specified layer to + */ + public void rnnSetPreviousState(int layer, Map state) { + if (layer < 0 || layer >= layers.length) { + throw new IllegalArgumentException("Invalid layer number"); + } + Layer l = layers[layer]; + if (l instanceof org.deeplearning4j.nn.layers.wrapper.BaseWrapperLayer) { + l = ((org.deeplearning4j.nn.layers.wrapper.BaseWrapperLayer) l).getUnderlying(); + } + if (!(l instanceof RecurrentLayer)) { + throw new IllegalArgumentException("Layer is not an RNN layer"); + } + RecurrentLayer r = (RecurrentLayer) l; + r.rnnSetPreviousState(state); + } + + /** + * Clear the previous state of the RNN layers (if any). + */ + public void rnnClearPreviousState() { + if (layers == null) { + return; + } + for (int i = 0; i < layers.length; i++) { + if (layers[i] instanceof RecurrentLayer) { + ((RecurrentLayer) layers[i]).rnnClearPreviousState(); + } else if (layers[i] instanceof MultiLayerNetwork) { + ((MultiLayerNetwork) layers[i]).rnnClearPreviousState(); + } else if (layers[i] instanceof BaseWrapperLayer + && ((BaseWrapperLayer) layers[i]).getUnderlying() instanceof RecurrentLayer) { + ((RecurrentLayer) ((BaseWrapperLayer) layers[i]).getUnderlying()).rnnClearPreviousState(); + } + } + } + + /** + * Similar to rnnTimeStep and feedForward() methods. Difference here is that this method:
(a) + * like rnnTimeStep does forward pass using stored state for RNN layers, and
(b) unlike + * rnnTimeStep does not modify the RNN layer state
Therefore multiple calls to this method + * with the same input should have the same output.
Typically used during training only. Use + * rnnTimeStep for prediction/forward pass at test time. + * + * @param input Input to network + * @param training Whether training or not + * @param storeLastForTBPTT set to true if used as part of truncated BPTT training + * @return Activations for each layer (including input, as per feedforward() etc) + */ + public List rnnActivateUsingStoredState(INDArray input, boolean training, + boolean storeLastForTBPTT) { + return ffToLayerActivationsDetached(training, FwdPassType.RNN_ACTIVATE_WITH_STORED_STATE, + storeLastForTBPTT, layers.length - 1, input, mask, null, false); + } + + /** + * Get the updater for this MultiLayerNetwork + * + * @return Updater for MultiLayerNetwork + */ + public Updater getUpdater() { + return getUpdater(true); + } + + /** + * Set the updater for the MultiLayerNetwork + */ + public void setUpdater(Updater updater) { + if (solver == null) { + solver = new Solver.Builder().configure(conf()).listeners(getListeners()).model(this).build(); + } + solver.getOptimizer().setUpdater(updater); + } + + public Updater getUpdater(boolean initializeIfReq) { + if (solver == null && initializeIfReq) { + synchronized (this) { + if (solver == null) { //May have been created while waiting for lock + solver = new Solver.Builder().configure(conf()).listeners(getListeners()).model(this) + .build(); + solver.getOptimizer().setUpdater(UpdaterCreator.getUpdater(this)); + } + } + } + if (solver != null) { + return solver.getOptimizer().getUpdater(initializeIfReq); + } + return null; + } + + /** + * Set the mask arrays for features and labels. Mask arrays are typically used in situations such + * as one-to-many and many-to-one learning with recurrent neural networks, as well as for + * supporting time series of varying lengths within the same minibatch.
For example, with RNN + * data sets with input of shape [miniBatchSize,nIn,timeSeriesLength] and outputs of shape + * [miniBatchSize,nOut,timeSeriesLength], the features and mask arrays will have shape + * [miniBatchSize,timeSeriesLength] and contain values 0 or 1 at each element (to specify whether + * a given input/example is present - or merely padding - at a given time step).
+ * NOTE: This method is not usually used directly. Instead, methods such as + * {@link #feedForward(INDArray, INDArray, INDArray)} + * and {@link #output(INDArray, boolean, INDArray, INDArray)} handle setting of masking + * internally. + * + * @param featuresMaskArray Mask array for features (input) + * @param labelsMaskArray Mask array for labels (output) + * @see #clearLayerMaskArrays() + */ + public void setLayerMaskArrays(INDArray featuresMaskArray, INDArray labelsMaskArray) { + if (featuresMaskArray != null) { + + if (featuresMaskArray.size(0) > Integer.MAX_VALUE) { + throw new ND4JArraySizeException(); + } + //New approach: use feedForwardMaskArray method + feedForwardMaskArray(featuresMaskArray, MaskState.Active, (int) featuresMaskArray.size(0)); /* @@ -3308,837 +3629,883 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, org.d // non-zero (i.e., activationFunction(0*weights + bias) != 0 in general) //This assumes that the time series input is masked - i.e., values are 0 at the padded time steps, // so we don't need to do anything for the recurrent layer - + //Now, if mask array is 2d -> need to reshape to 1d (column vector) in the exact same order // as is done for 3d -> 2d time series reshaping INDArray reshapedFeaturesMask = TimeSeriesUtils.reshapeTimeSeriesMaskToVector(featuresMaskArray); - + for( int i=0; i See {@link #setLayerMaskArrays(INDArray, INDArray)} + * for details on mask arrays. + */ + public void clearLayerMaskArrays() { + for (Layer layer : layers) { + layer.setMaskArray(null); + } + } + + /** + * Evaluate the network (classification performance) + * + * @param iterator Iterator to evaluate on + * @return Evaluation object; results of evaluation on all examples in the data set + */ + public T evaluate(@NonNull DataSetIterator iterator) { + return (T) evaluate(iterator, null); + } + + /** + * Evaluate the network (classification performance). Can only be used with MultiDataSetIterator + * instances with a single input/output array + * + * @param iterator Iterator to evaluate on + * @return Evaluation object; results of evaluation on all examples in the data set + */ + public Evaluation evaluate(@NonNull MultiDataSetIterator iterator) { + return evaluate(new MultiDataSetWrapperIterator(iterator)); + } + + /** + * Evaluate the network for regression performance + * + * @param iterator Data to evaluate on + * @return Regression evaluation + */ + public T evaluateRegression(DataSetIterator iterator) { + return (T) doEvaluation(iterator, new RegressionEvaluation(iterator.totalOutcomes()))[0]; + } + + /** + * Evaluate the network for regression performance Can only be used with MultiDataSetIterator + * instances with a single input/output array + * + * @param iterator Data to evaluate on + */ + public org.nd4j.evaluation.regression.RegressionEvaluation evaluateRegression( + MultiDataSetIterator iterator) { + return evaluateRegression(new MultiDataSetWrapperIterator(iterator)); + } + + /** + * @deprecated To be removed - use {@link #evaluateROC(DataSetIterator, int)} to enforce selection + * of appropriate ROC/threshold configuration + */ + @Deprecated + public T evaluateROC(DataSetIterator iterator) { + return evaluateROC(iterator, 0); + } + + /** + * Evaluate the network (must be a binary classifier) on the specified data, using the {@link ROC} + * class + * + * @param iterator Data to evaluate on + * @param rocThresholdSteps Number of threshold steps to use with {@link ROC} - see that class for + * details. + * @return ROC evaluation on the given dataset + */ + public T evaluateROC(DataSetIterator iterator, int rocThresholdSteps) { + Layer outputLayer = getOutputLayer(); + if (getLayerWiseConfigurations().isValidateOutputLayerConfig()) { + OutputLayerUtil.validateOutputLayerForClassifierEvaluation(outputLayer.conf().getLayer(), + ROC.class); + } + return (T) doEvaluation(iterator, new org.deeplearning4j.eval.ROC(rocThresholdSteps))[0]; + } + + /** + * @deprecated To be removed - use {@link #evaluateROCMultiClass(DataSetIterator, int)} to enforce + * selection of appropriate ROC/threshold configuration + */ + @Deprecated + public T evaluateROCMultiClass(DataSetIterator iterator) { + return evaluateROCMultiClass(iterator, 0); + } + + /** + * Evaluate the network on the specified data, using the {@link ROCMultiClass} class + * + * @param iterator Data to evaluate on + * @param rocThresholdSteps Number of threshold steps to use with {@link ROCMultiClass} + * @return Multi-class ROC evaluation on the given dataset + */ + public T evaluateROCMultiClass(DataSetIterator iterator, + int rocThresholdSteps) { + Layer outputLayer = getOutputLayer(); + if (getLayerWiseConfigurations().isValidateOutputLayerConfig()) { + OutputLayerUtil.validateOutputLayerForClassifierEvaluation(outputLayer.conf().getLayer(), + ROCMultiClass.class); + } + return (T) doEvaluation(iterator, + new org.deeplearning4j.eval.ROCMultiClass(rocThresholdSteps))[0]; + } + + /** + * Perform evaluation using an arbitrary IEvaluation instance. + * + * @param iterator data to evaluate on + */ + public T[] doEvaluation(DataSetIterator iterator, T... evaluations) { + try { + return doEvaluationHelper(iterator, evaluations); + } catch (OutOfMemoryError e) { + CrashReportingUtil.writeMemoryCrashDump(this, e); + throw e; + } + } + + public T[] doEvaluationHelper(DataSetIterator iterator, + T... evaluations) { + if (!iterator.hasNext() && iterator.resetSupported()) { + iterator.reset(); } - /** Remove the mask arrays from all layers.
- * See {@link #setLayerMaskArrays(INDArray, INDArray)} for details on mask arrays. - */ - public void clearLayerMaskArrays() { - for (Layer layer : layers) { - layer.setMaskArray(null); - } + DataSetIterator iter = + iterator.asyncSupported() ? new AsyncDataSetIterator(iterator, 2, true) : iterator; + + WorkspaceMode cMode = layerWiseConfigurations.getTrainingWorkspaceMode(); + layerWiseConfigurations.setTrainingWorkspaceMode( + layerWiseConfigurations.getInferenceWorkspaceMode()); + + //First: let's determine if we should do 'split feed forward' for long time series + //The idea: RNN 20k time steps. Train using TBPTT length 100 -> 200 segments of length 100. If we naively + // just use .output(INDArray) here, then our memory requirements are 200x larger than if we did the same + // evaluation in segments... + //Only do this if TBPTT is enabled - if not, it means we can train without TBPTT and hence should be able + // to test without splitting also + boolean useRnnSegments = (layerWiseConfigurations.getBackpropType() + == BackpropType.TruncatedBPTT); + + MemoryWorkspace outputWs; + if (getLayerWiseConfigurations().getInferenceWorkspaceMode() == WorkspaceMode.ENABLED) { + outputWs = Nd4j.getWorkspaceManager() + .getWorkspaceForCurrentThread(WS_ALL_LAYERS_ACT_CONFIG, WS_OUTPUT_MEM); + } else { + outputWs = new DummyWorkspace(); } - /** - * Evaluate the network (classification performance) - * - * @param iterator Iterator to evaluate on - * @return Evaluation object; results of evaluation on all examples in the data set - */ - public T evaluate(@NonNull DataSetIterator iterator) { - return (T)evaluate(iterator, null); - } + while (iter.hasNext()) { + DataSet next = iter.next(); - /** - * Evaluate the network (classification performance). - * Can only be used with MultiDataSetIterator instances with a single input/output array - * - * @param iterator Iterator to evaluate on - * @return Evaluation object; results of evaluation on all examples in the data set - */ - public Evaluation evaluate(@NonNull MultiDataSetIterator iterator) { - return evaluate(new MultiDataSetWrapperIterator(iterator)); - } + if (next.getFeatures() == null || next.getLabels() == null) { + continue; + } - /** - * Evaluate the network for regression performance - * @param iterator Data to evaluate on - * @return Regression evaluation - */ - public T evaluateRegression(DataSetIterator iterator) { - return (T)doEvaluation(iterator, new RegressionEvaluation(iterator.totalOutcomes()))[0]; - } + INDArray features = next.getFeatures(); + INDArray labels = next.getLabels(); + INDArray fMask = next.getFeaturesMaskArray(); + INDArray lMask = next.getLabelsMaskArray(); + List meta = next.getExampleMetaData(); - /** - * Evaluate the network for regression performance - * Can only be used with MultiDataSetIterator instances with a single input/output array - * @param iterator Data to evaluate on - */ - public org.nd4j.evaluation.regression.RegressionEvaluation evaluateRegression(MultiDataSetIterator iterator) { - return evaluateRegression(new MultiDataSetWrapperIterator(iterator)); - } + if (!useRnnSegments) { + //Standard/non-RNN case: + try (MemoryWorkspace ws = outputWs.notifyScopeEntered()) { + INDArray out = outputOfLayerDetached(false, FwdPassType.STANDARD, layers.length - 1, + features, fMask, lMask, ws); - /** - * @deprecated To be removed - use {@link #evaluateROC(DataSetIterator, int)} to enforce selection of appropriate ROC/threshold configuration - */ - @Deprecated - public T evaluateROC(DataSetIterator iterator){ - return evaluateROC(iterator, 0); - } - - /** - * Evaluate the network (must be a binary classifier) on the specified data, using the {@link ROC} class - * - * @param iterator Data to evaluate on - * @param rocThresholdSteps Number of threshold steps to use with {@link ROC} - see that class for details. - * @return ROC evaluation on the given dataset - */ - public T evaluateROC(DataSetIterator iterator, int rocThresholdSteps) { - Layer outputLayer = getOutputLayer(); - if(getLayerWiseConfigurations().isValidateOutputLayerConfig()){ - OutputLayerUtil.validateOutputLayerForClassifierEvaluation(outputLayer.conf().getLayer(), ROC.class); - } - return (T)doEvaluation(iterator, new org.deeplearning4j.eval.ROC(rocThresholdSteps))[0]; - } - - /** - * @deprecated To be removed - use {@link #evaluateROCMultiClass(DataSetIterator, int)} to enforce selection of appropriate ROC/threshold configuration - */ - @Deprecated - public T evaluateROCMultiClass(DataSetIterator iterator) { - return evaluateROCMultiClass(iterator, 0); - } - - /** - * Evaluate the network on the specified data, using the {@link ROCMultiClass} class - * - * @param iterator Data to evaluate on - * @param rocThresholdSteps Number of threshold steps to use with {@link ROCMultiClass} - * @return Multi-class ROC evaluation on the given dataset - */ - public T evaluateROCMultiClass(DataSetIterator iterator, int rocThresholdSteps) { - Layer outputLayer = getOutputLayer(); - if(getLayerWiseConfigurations().isValidateOutputLayerConfig()){ - OutputLayerUtil.validateOutputLayerForClassifierEvaluation(outputLayer.conf().getLayer(), ROCMultiClass.class); - } - return (T)doEvaluation(iterator, new org.deeplearning4j.eval.ROCMultiClass(rocThresholdSteps))[0]; - } - - /** - * Perform evaluation using an arbitrary IEvaluation instance. - * - * @param iterator data to evaluate on - */ - public T[] doEvaluation(DataSetIterator iterator, T... evaluations) { - try{ - return doEvaluationHelper(iterator, evaluations); - } catch (OutOfMemoryError e){ - CrashReportingUtil.writeMemoryCrashDump(this, e); - throw e; - } - } - - public T[] doEvaluationHelper(DataSetIterator iterator, T... evaluations) { - if (!iterator.hasNext() && iterator.resetSupported()) { - iterator.reset(); - } - - DataSetIterator iter = iterator.asyncSupported() ? new AsyncDataSetIterator(iterator, 2, true) : iterator; - - WorkspaceMode cMode = layerWiseConfigurations.getTrainingWorkspaceMode(); - layerWiseConfigurations.setTrainingWorkspaceMode(layerWiseConfigurations.getInferenceWorkspaceMode()); - - //First: let's determine if we should do 'split feed forward' for long time series - //The idea: RNN 20k time steps. Train using TBPTT length 100 -> 200 segments of length 100. If we naively - // just use .output(INDArray) here, then our memory requirements are 200x larger than if we did the same - // evaluation in segments... - //Only do this if TBPTT is enabled - if not, it means we can train without TBPTT and hence should be able - // to test without splitting also - boolean useRnnSegments = (layerWiseConfigurations.getBackpropType() == BackpropType.TruncatedBPTT); - - MemoryWorkspace outputWs; - if(getLayerWiseConfigurations().getInferenceWorkspaceMode() == WorkspaceMode.ENABLED){ - outputWs = Nd4j.getWorkspaceManager().getWorkspaceForCurrentThread(WS_ALL_LAYERS_ACT_CONFIG, WS_OUTPUT_MEM); - } else { - outputWs = new DummyWorkspace(); - } - - while (iter.hasNext()) { - DataSet next = iter.next(); - - if (next.getFeatures() == null || next.getLabels() == null) - continue; - - - INDArray features = next.getFeatures(); - INDArray labels = next.getLabels(); - INDArray fMask = next.getFeaturesMaskArray(); - INDArray lMask = next.getLabelsMaskArray(); - List meta = next.getExampleMetaData(); - - - if (!useRnnSegments) { - //Standard/non-RNN case: - try (MemoryWorkspace ws = outputWs.notifyScopeEntered()) { - INDArray out = outputOfLayerDetached(false, FwdPassType.STANDARD, layers.length - 1, features, fMask, lMask, ws); - - try (MemoryWorkspace wsO = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) { - for (T evaluation : evaluations) - evaluation.eval(labels, out, lMask, meta); - } - } - } else { - rnnClearPreviousState(); - - - //Get subset of features and labels: - val fwdLen = layerWiseConfigurations.getTbpttFwdLength(); - val tsLength = features.size(2); - long nSubsets = tsLength / fwdLen; - if (tsLength % fwdLen != 0) - nSubsets++; //Example: 100 fwdLen with timeSeriesLength=120 -> want 2 subsets (1 of size 100, 1 of size 20) - for (int i = 0; i < nSubsets; i++) { - val startTimeIdx = i * fwdLen; - val endTimeIdx = Math.min(startTimeIdx + fwdLen, tsLength); - - if (endTimeIdx > Integer.MAX_VALUE) - throw new ND4JArraySizeException(); - INDArray[] subsets = getSubsetsForTbptt(startTimeIdx, (int) endTimeIdx, features, labels, fMask, lMask); - - setLayerMaskArrays(subsets[2], subsets[3]); - - try (MemoryWorkspace ws = outputWs.notifyScopeEntered()) { - INDArray outSub = rnnTimeStep(subsets[0], ws); - try (MemoryWorkspace wsO = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) { - for (T evaluation : evaluations) - evaluation.eval(subsets[1], outSub, subsets[3]); - } - } - } + try (MemoryWorkspace wsO = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) { + for (T evaluation : evaluations) { + evaluation.eval(labels, out, lMask, meta); } - - //Clear inputs, masks etc. Important to avoid leaking invalidated/out of scope arrays between iterations - clearLayersStates(); + } } + } else { + rnnClearPreviousState(); - if (iterator.asyncSupported()) - ((AsyncDataSetIterator) iter).shutdown(); - - layerWiseConfigurations.setTrainingWorkspaceMode(cMode); - - return evaluations; - } - - /** - * Evaluate the network on the provided data set. Used for evaluating the performance of classifiers - * - * @param iterator Data to undertake evaluation on - * @return Evaluation object, summarizing the results of the evaluation on the provided DataSetIterator - */ - public Evaluation evaluate(DataSetIterator iterator, List labelsList) { - return evaluate(iterator, labelsList, 1); - } - - @Override - public INDArray updaterState() { - return getUpdater() != null ? getUpdater().getStateViewArray() : null; - } - - @Override - public void fit(MultiDataSet dataSet) { - if (dataSet.getFeatures().length == 1 && dataSet.getLabels().length == 1) { - INDArray features = dataSet.getFeatures(0); - INDArray labels = dataSet.getLabels(0); - INDArray fMask = null; - INDArray lMask = null; - - if (dataSet.getFeaturesMaskArrays() != null) - fMask = dataSet.getFeaturesMaskArrays()[0]; - - if (dataSet.getFeaturesMaskArrays() != null) - lMask = dataSet.getLabelsMaskArrays()[0]; - - DataSet ds = new DataSet(features, labels, fMask, lMask); - fit(ds); - } else { - throw new DL4JInvalidInputException( - "MultiLayerNetwork can't handle MultiDataSet with more than 1 features or labels array." + - "Please consider use of ComputationGraph"); + //Get subset of features and labels: + val fwdLen = layerWiseConfigurations.getTbpttFwdLength(); + val tsLength = features.size(2); + long nSubsets = tsLength / fwdLen; + if (tsLength % fwdLen != 0) { + nSubsets++; //Example: 100 fwdLen with timeSeriesLength=120 -> want 2 subsets (1 of size 100, 1 of size 20) } - } + for (int i = 0; i < nSubsets; i++) { + val startTimeIdx = i * fwdLen; + val endTimeIdx = Math.min(startTimeIdx + fwdLen, tsLength); - /** - * Perform minibatch training on all minibatches in the MultiDataSetIterator, for the specified number of epochs. - * Equvalent to calling {@link #fit(MultiDataSetIterator)} numEpochs times in a loop - * - * @param iterator Training data (DataSetIterator). Iterator must support resetting - * @param numEpochs Number of training epochs, >= 1 - */ - public void fit(@NonNull MultiDataSetIterator iterator, int numEpochs){ - Preconditions.checkArgument(numEpochs > 0, "Number of epochs much be > 0. Got numEpochs = %s", numEpochs); - Preconditions.checkArgument(numEpochs == 1 || iterator.resetSupported(), "Cannot perform multiple epochs training using" + - "iterator has does not support resetting (iterator.resetSupported() returned false)"); - - for(int i = 0; i < numEpochs; i++) { - fit(iterator); - } - } - - /** - * Perform minibatch training on all minibatches in the MultiDataSetIterator.
- * Note: The MultiDataSets in the MultiDataSetIterator must have exactly 1 input and output array (as - * MultiLayerNetwork only supports 1 input and 1 output) - * - * @param iterator Training data (DataSetIterator). Iterator must support resetting - */ - @Override - public void fit(MultiDataSetIterator iterator) { - fit(new MultiDataSetWrapperIterator(iterator)); - } - - @Override - public T[] doEvaluation(MultiDataSetIterator iterator, T[] evaluations) { - return doEvaluation(new MultiDataSetWrapperIterator(iterator), evaluations); - } - - /** - * Evaluate the network (for classification) on the provided data set, with top N accuracy in addition to standard accuracy. - * For 'standard' accuracy evaluation only, use topN = 1 - * - * @param iterator Iterator (data) to evaluate on - * @param labelsList List of labels. May be null. - * @param topN N value for top N accuracy evaluation - * @return Evaluation object, summarizing the results of the evaluation on the provided DataSetIterator - */ - public Evaluation evaluate(DataSetIterator iterator, List labelsList, int topN) { - if (layers == null || !(getOutputLayer() instanceof IOutputLayer)) { - throw new IllegalStateException("Cannot evaluate network with no output layer"); - } - if (labelsList == null) { - try { - labelsList = iterator.getLabels(); - } catch (Throwable t){ } //Ignore, maybe UnsupportedOperationException etc - } - - Layer outputLayer = getOutputLayer(); - if(getLayerWiseConfigurations().isValidateOutputLayerConfig()){ - OutputLayerUtil.validateOutputLayerForClassifierEvaluation(outputLayer.conf().getLayer(), Evaluation.class); - } - - Evaluation e = new org.deeplearning4j.eval.Evaluation(labelsList, topN); - doEvaluation(iterator, e); - - return e; - } - - protected void update(Task task) { - if (!initDone) { - initDone = true; - Heartbeat heartbeat = Heartbeat.getInstance(); - task = ModelSerializer.taskByModel(this); - Environment env = EnvironmentUtils.buildEnvironment(); - heartbeat.reportEvent(Event.STANDALONE, env, task); - } - } - - /** - * String detailing the architecture of the multilayernetwork. - * Columns are LayerIndex with layer type, nIn, nOut, Total number of parameters and the Shapes of the parameters - * Will also give information about frozen layers, if any. - * @return Summary as a string - * @see #memoryInfo(int, InputType) - */ - public String summary() { - return summary(null); - } - - /** - * String detailing the architecture of the multilayernetwork. - * Will also display activation size when given an input type. - * Columns are LayerIndex with layer type, nIn, nOut, Total number of parameters, Shapes of the parameters, Input activation shape, Output activation shape - * Will also give information about frozen layers, if any. - * @return Summary as a string - * @see #memoryInfo(int, InputType) - */ - public String summary(InputType inputType) { - StringBuilder ret = new StringBuilder(); - ret.append("\n"); - - List lines = new ArrayList<>(); - if(inputType == null){ - lines.add(new String[]{"LayerName (LayerType)", "nIn,nOut", "TotalParams", "ParamsShape"}); - } else { - lines.add(new String[]{"LayerName (LayerType)", "nIn,nOut", "TotalParams", "ParamsShape", "InputShape", "OutputShape"}); - } - int[] maxLength = new int[inputType == null ? 4 : 6]; - String[] header = lines.get(0); - for( int i=0; i 0) { - paramShape = ""; - if (currentLayer instanceof BidirectionalLayer) { // Bidirectional layer is not an FFL - BidirectionalLayer bi = (BidirectionalLayer) currentLayer; - in = String.valueOf(((Bidirectional)bi.conf().getLayer()).getNIn()); - out = String.valueOf(((Bidirectional)bi.conf().getLayer()).getNOut()); - } else { - try { - in = String.valueOf(((FeedForwardLayer) currentLayer.conf().getLayer()).getNIn()); - out = String.valueOf(((FeedForwardLayer) currentLayer.conf().getLayer()).getNOut()); - } - catch (Exception e) { // Some layers, like PReLU, are just BaseLayers (but have parameters) - } - } - Set paraNames = currentLayer.paramTable().keySet(); - for (String aP : paraNames) { - String paramS = ArrayUtils.toString(currentLayer.paramTable().get(aP).shape()); - paramShape += aP + ":" + paramS + ", "; - } - paramShape = paramShape.subSequence(0, paramShape.lastIndexOf(",")).toString(); - } - if (currentLayer instanceof FrozenLayer) { - frozenParams += currentLayer.numParams(); - classNameArr = ((FrozenLayer) currentLayer).getInsideLayer().getClass().getName().split("\\."); - className = "Frozen " + classNameArr[classNameArr.length - 1]; - } - - String[] line; - if (inputType == null) { - line = new String[]{name + " (" + className + ")", in + "," + out, paramCount, paramShape}; - } else { - line = new String[]{name + " (" + className + ")", in + "," + out, paramCount,paramShape,inShape,outShape}; - } - for( int i=0; iautomatically when using iterator-based fitting methods, such as - * {@link #fit(DataSetIterator)}. However, when using non-iterator fit methods (DataSet, INDArray/INDArray etc), - * the network has no way to know when one epoch ends and another starts. In such situations, this method - * can be used to increment the epoch counter.
- * Note that the epoch counter is used for situations such as some learning rate schedules, and the like. - * - * The current epoch count can be obtained using {@code MultiLayerConfiguration.getLayerwiseConfiguration().getEpochCount()} - */ - public void incrementEpochCount(){ - layerWiseConfigurations.setEpochCount(layerWiseConfigurations.getEpochCount() + 1); - synchronizeIterEpochCounts(); - } - - - protected void synchronizeIterEpochCounts() { - //TODO: this is necessary for some schedules - but the redundant values are a little ugly... - int currIter = getIterationCount(); - int currEpoch = getEpochCount(); - for(Layer l : layers) { - l.setIterationCount(currIter); - l.setEpochCount(currEpoch); - } - } - - /** - * Save the MultiLayerNetwork to a file. Restore using {@link #load(File, boolean)}. - * Note that this saves the updater (i.e., the state array for momentum/Adam/rmsprop etc), which is desirable - * if further training will be undertaken. - * - * @param f File to save the network to - * @see ModelSerializer ModelSerializer for more details (and saving/loading via streams) - * @see #save(File, boolean) - */ - public void save( File f ) throws IOException { - save(f, true); - } - - /** - * Save the MultiLayerNetwork to a file. Restore using {@link #load(File, boolean)}. - * - * @param f File to save the network to - * @param saveUpdater If true: save the updater (i.e., the state array for momentum/Adam/rmsprop etc), which should - * usually be saved if further training is required - * @see ModelSerializer ModelSerializer for more details (and saving/loading via streams) - * @see #save(File, boolean) - */ - public void save(File f, boolean saveUpdater) throws IOException{ - ModelSerializer.writeModel(this, f, saveUpdater); - } - - /** - * Restore a MultiLayerNetwork to a file, saved using {@link #save(File)} or {@link ModelSerializer} - * @param f File to load the network from - * @param loadUpdater If true: load the updater if it is available (i.e., the state array for momentum/Adam/rmsprop - * etc) - use false if no further training is required, or true if further training - * will be undertaken - * @see ModelSerializer ModelSerializer for more details (and saving/loading via streams) - */ - public static MultiLayerNetwork load(File f, boolean loadUpdater) throws IOException { - return ModelSerializer.restoreMultiLayerNetwork(f, loadUpdater); - } - - /** - * Convert this MultiLayerNetwork to a ComputationGraph - * - * @return ComputationGraph equivalent to this network (including parameters and updater state) - */ - public ComputationGraph toComputationGraph(){ - return NetworkUtils.toComputationGraph(this); - } - - /** - * Return a copy of the network with the parameters and activations set to use the specified (floating point) data type. - * If the existing datatype is the same as the requested dataype, the original network will be returned unchanged. - * Only floating point datatypes (DOUBLE, FLOAT, HALF) may be used. - * - * @param dataType Datatype to convert the network to - * @return The network, set to use the specified datatype for the parameters and activations - */ - public MultiLayerNetwork convertDataType(@NonNull DataType dataType){ - Preconditions.checkState(dataType.isFPType(), "Invalid DataType: %s. Can only convert network to a floating point type", dataType); - if(dataType == params().dataType()){ - return this; - } - - try(MemoryWorkspace ws = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { - INDArray newParams = params().castTo(dataType); - String jsonConfig = getLayerWiseConfigurations().toJson(); - MultiLayerConfiguration newConf = MultiLayerConfiguration.fromJson(jsonConfig); - newConf.setDataType(dataType); - MultiLayerNetwork newNet = new MultiLayerNetwork(newConf); - newNet.init(newParams, false); - - Updater u = getUpdater(false); - if(u != null && u.getStateViewArray() != null){ - INDArray oldUpdaterState = u.getStateViewArray(); - newNet.getUpdater(true).getStateViewArray().assign(oldUpdaterState); - } - return newNet; - } - } - - /** - * Set the learning rate for all layers in the network to the specified value. Note that if any learning rate - * schedules are currently present, these will be removed in favor of the new (fixed) learning rate.
- *
- * Note: This method not free from a performance point of view: a proper learning rate schedule - * should be used in preference to calling this method at every iteration. - * - * @param newLr New learning rate for all layers - * @see #setLearningRate(ISchedule) - * @see #setLearningRate(int, double) - */ - public void setLearningRate(double newLr){ - NetworkUtils.setLearningRate(this, newLr); - } - - /** - * Set the learning rate schedule for all layers in the network to the specified schedule. - * This schedule will replace any/all existing schedules, and also any fixed learning rate values.
- * Note that the iteration/epoch counts will not be reset. Use {@link MultiLayerConfiguration#setIterationCount(int)} - * and {@link MultiLayerConfiguration#setEpochCount(int)} if this is required - * - * @param newLr New learning rate schedule for all layers - * @see #setLearningRate(ISchedule) - * @see #setLearningRate(int, double) - */ - public void setLearningRate(ISchedule newLr){ - NetworkUtils.setLearningRate(this, newLr); - } - - /** - * Set the learning rate for a single layer in the network to the specified value. Note that if any learning rate - * schedules are currently present, these will be removed in favor of the new (fixed) learning rate.
- *
- * Note: This method not free from a performance point of view: a proper learning rate schedule - * should be used in preference to calling this method at every iteration. Note also that - * {@link #setLearningRate(double)} should also be used in preference, when all layers need to be set to a new LR - * - * @param layerNumber Number of the layer to set the LR for - * @param newLr New learning rate for a single layer - * @see #setLearningRate(ISchedule) - * @see #setLearningRate(int, double) - */ - public void setLearningRate(int layerNumber, double newLr){ - NetworkUtils.setLearningRate(this, layerNumber, newLr); - } - - /** - * Set the learning rate schedule for a single layer in the network to the specified value.
- * Note also that {@link #setLearningRate(ISchedule)} should also be used in preference, when all layers need - * to be set to a new LR schedule.
- * This schedule will replace any/all existing schedules, and also any fixed learning rate values.
- * Note also that the iteration/epoch counts will not be reset. Use {@link MultiLayerConfiguration#setIterationCount(int)} - * and {@link MultiLayerConfiguration#setEpochCount(int)} if this is required - * - * @param layerNumber Number of the layer to set the LR schedule for - * @param newLr New learning rate for a single layer - * @see #setLearningRate(ISchedule) - * @see #setLearningRate(int, double) - */ - public void setLearningRate(int layerNumber, ISchedule newLr){ - NetworkUtils.setLearningRate(this, layerNumber, newLr); - } - - /** - * Get the current learning rate, for the specified layer, from the network. - * Note: If the layer has no learning rate (no parameters, or an updater without a learning rate) then null is returned - * @param layerNumber Layer number to get the learning rate for - * @return Learning rate for the specified layer, or null - */ - public Double getLearningRate(int layerNumber){ - return NetworkUtils.getLearningRate(this, layerNumber); - } - - /** - * Return the layer size (number of units) for the specified layer.
- * Note that the meaning of the "layer size" can depend on the type of layer. For example:
- * - DenseLayer, OutputLayer, recurrent layers: number of units (nOut configuration option)
- * - ConvolutionLayer: the channels (number of channels)
- * - Subsampling layers, global pooling layers, etc: size of 0 is always returned
- * - * @param layer Index of the layer to get the size of. Must be in range 0 to nLayers-1 inclusive - * @return Size of the layer - */ - public int layerSize(int layer) { - if (layer < 0 || layer > layers.length) { - throw new IllegalArgumentException("Invalid layer index: " + layer + ". Layer index must be between 0 and " - + (layers.length - 1) + " inclusive"); - } - org.deeplearning4j.nn.conf.layers.Layer conf = layers[layer].conf().getLayer(); - if (conf == null || !(conf instanceof FeedForwardLayer)) { - return 0; - } - FeedForwardLayer ffl = (FeedForwardLayer) conf; - - if (ffl.getNOut() > Integer.MAX_VALUE) + if (endTimeIdx > Integer.MAX_VALUE) { throw new ND4JArraySizeException(); - return (int) ffl.getNOut(); - } + } + INDArray[] subsets = getSubsetsForTbptt(startTimeIdx, (int) endTimeIdx, features, labels, + fMask, lMask); - /** - * Return the input size (number of inputs) for the specified layer.
- * Note that the meaning of the "input size" can depend on the type of layer. For example:
- * - DenseLayer, OutputLayer, etc: the feature vector size (nIn configuration option)
- * - Recurrent layers: the feature vector size per time step (nIn configuration option)
- * - ConvolutionLayer: the channels (number of channels)
- * - Subsampling layers, global pooling layers, etc: size of 0 is always returned
- * - * @param layer Index of the layer to get the size of. Must be in range 0 to nLayers-1 inclusive - * @return Size of the layer - */ - public int layerInputSize(int layer) { - if (layer < 0 || layer > layers.length) { - throw new IllegalArgumentException("Invalid layer index: " + layer + ". Layer index must be between 0 and " - + (layers.length - 1) + " inclusive"); + setLayerMaskArrays(subsets[2], subsets[3]); + + try (MemoryWorkspace ws = outputWs.notifyScopeEntered()) { + INDArray outSub = rnnTimeStep(subsets[0], ws); + try (MemoryWorkspace wsO = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) { + for (T evaluation : evaluations) { + evaluation.eval(subsets[1], outSub, subsets[3]); + } + } + } } - org.deeplearning4j.nn.conf.layers.Layer conf = layers[layer].conf().getLayer(); - if (conf == null || !(conf instanceof FeedForwardLayer)) { - return 0; + } + + //Clear inputs, masks etc. Important to avoid leaking invalidated/out of scope arrays between iterations + clearLayersStates(); + } + + if (iterator.asyncSupported()) { + ((AsyncDataSetIterator) iter).shutdown(); + } + + layerWiseConfigurations.setTrainingWorkspaceMode(cMode); + + return evaluations; + } + + /** + * Evaluate the network on the provided data set. Used for evaluating the performance of + * classifiers + * + * @param iterator Data to undertake evaluation on + * @return Evaluation object, summarizing the results of the evaluation on the provided + * DataSetIterator + */ + public Evaluation evaluate(DataSetIterator iterator, List labelsList) { + return evaluate(iterator, labelsList, 1); + } + + @Override + public INDArray updaterState() { + return getUpdater() != null ? getUpdater().getStateViewArray() : null; + } + + @Override + public void fit(MultiDataSet dataSet) { + if (dataSet.getFeatures().length == 1 && dataSet.getLabels().length == 1) { + INDArray features = dataSet.getFeatures(0); + INDArray labels = dataSet.getLabels(0); + INDArray fMask = null; + INDArray lMask = null; + + if (dataSet.getFeaturesMaskArrays() != null) { + fMask = dataSet.getFeaturesMaskArrays()[0]; + } + + if (dataSet.getFeaturesMaskArrays() != null) { + lMask = dataSet.getLabelsMaskArrays()[0]; + } + + DataSet ds = new DataSet(features, labels, fMask, lMask); + fit(ds); + } else { + throw new DL4JInvalidInputException( + "MultiLayerNetwork can't handle MultiDataSet with more than 1 features or labels array." + + "Please consider use of ComputationGraph"); + } + } + + /** + * Perform minibatch training on all minibatches in the MultiDataSetIterator, for the specified + * number of epochs. Equvalent to calling {@link #fit(MultiDataSetIterator)} numEpochs times in a + * loop + * + * @param iterator Training data (DataSetIterator). Iterator must support resetting + * @param numEpochs Number of training epochs, >= 1 + */ + public void fit(@NonNull MultiDataSetIterator iterator, int numEpochs) { + Preconditions.checkArgument(numEpochs > 0, "Number of epochs much be > 0. Got numEpochs = %s", + numEpochs); + Preconditions.checkArgument(numEpochs == 1 || iterator.resetSupported(), + "Cannot perform multiple epochs training using" + + "iterator has does not support resetting (iterator.resetSupported() returned false)"); + + for (int i = 0; i < numEpochs; i++) { + fit(iterator); + } + } + + /** + * Perform minibatch training on all minibatches in the MultiDataSetIterator.
Note: The + * MultiDataSets in the MultiDataSetIterator must have exactly 1 input and output array (as + * MultiLayerNetwork only supports 1 input and 1 output) + * + * @param iterator Training data (DataSetIterator). Iterator must support resetting + */ + @Override + public void fit(MultiDataSetIterator iterator) { + fit(new MultiDataSetWrapperIterator(iterator)); + } + + @Override + public T[] doEvaluation(MultiDataSetIterator iterator, T[] evaluations) { + return doEvaluation(new MultiDataSetWrapperIterator(iterator), evaluations); + } + + /** + * Evaluate the network (for classification) on the provided data set, with top N accuracy in + * addition to standard accuracy. For 'standard' accuracy evaluation only, use topN = 1 + * + * @param iterator Iterator (data) to evaluate on + * @param labelsList List of labels. May be null. + * @param topN N value for top N accuracy evaluation + * @return Evaluation object, summarizing the results of the evaluation on the provided + * DataSetIterator + */ + public Evaluation evaluate(DataSetIterator iterator, List labelsList, int topN) { + if (layers == null || !(getOutputLayer() instanceof IOutputLayer)) { + throw new IllegalStateException("Cannot evaluate network with no output layer"); + } + if (labelsList == null) { + try { + labelsList = iterator.getLabels(); + } catch (Throwable t) { + } //Ignore, maybe UnsupportedOperationException etc + } + + Layer outputLayer = getOutputLayer(); + if (getLayerWiseConfigurations().isValidateOutputLayerConfig()) { + OutputLayerUtil.validateOutputLayerForClassifierEvaluation(outputLayer.conf().getLayer(), + Evaluation.class); + } + + Evaluation e = new org.deeplearning4j.eval.Evaluation(labelsList, topN); + doEvaluation(iterator, e); + + return e; + } + + protected void update(Task task) { + if (!initDone) { + initDone = true; + Heartbeat heartbeat = Heartbeat.getInstance(); + task = ModelSerializer.taskByModel(this); + Environment env = EnvironmentUtils.buildEnvironment(); + heartbeat.reportEvent(Event.STANDALONE, env, task); + } + } + + /** + * String detailing the architecture of the multilayernetwork. Columns are LayerIndex with layer + * type, nIn, nOut, Total number of parameters and the Shapes of the parameters Will also give + * information about frozen layers, if any. + * + * @return Summary as a string + * @see #memoryInfo(int, InputType) + */ + public String summary() { + return summary(null); + } + + /** + * String detailing the architecture of the multilayernetwork. Will also display activation size + * when given an input type. Columns are LayerIndex with layer type, nIn, nOut, Total number of + * parameters, Shapes of the parameters, Input activation shape, Output activation shape Will also + * give information about frozen layers, if any. + * + * @return Summary as a string + * @see #memoryInfo(int, InputType) + */ + public String summary(InputType inputType) { + StringBuilder ret = new StringBuilder(); + ret.append("\n"); + + List lines = new ArrayList<>(); + if (inputType == null) { + lines.add(new String[]{"LayerName (LayerType)", "nIn,nOut", "TotalParams", "ParamsShape"}); + } else { + lines.add(new String[]{"LayerName (LayerType)", "nIn,nOut", "TotalParams", "ParamsShape", + "InputShape", "OutputShape"}); + } + int[] maxLength = new int[inputType == null ? 4 : 6]; + String[] header = lines.get(0); + for (int i = 0; i < header.length; i++) { + maxLength[i] = header[i].length(); + } + + int frozenParams = 0; + for (org.deeplearning4j.nn.api.Layer currentLayer : getLayers()) { + String name = currentLayer.conf().getLayer().getLayerName(); + if (name == null) { + name = String.valueOf(currentLayer.getIndex()); + } + String paramShape = "-"; + String in = "-"; + String out = "-"; + String[] classNameArr = currentLayer.getClass().getName().split("\\."); + String className = classNameArr[classNameArr.length - 1]; + String paramCount = String.format("%,d", currentLayer.numParams()); + String inShape = ""; + String outShape = ""; + InputPreProcessor preProcessor; + InputType outType; + if (inputType != null) { + preProcessor = getLayerWiseConfigurations().getInputPreProcess(currentLayer.getIndex()); + inShape = inputType.toString(); + if (preProcessor != null) { + inputType = preProcessor.getOutputType(inputType); + inShape += "--> " + inputType.toString(); } - FeedForwardLayer ffl = (FeedForwardLayer) conf; - - if (ffl.getNIn() > Integer.MAX_VALUE) - throw new ND4JArraySizeException(); - return (int) ffl.getNIn(); - } - - /** - * Indicates whether some other object is "equal to" this one. - *

- * The {@code equals} method implements an equivalence relation - * on non-null object references: - *

    - *
  • It is reflexive: for any non-null reference value - * {@code x}, {@code x.equals(x)} should return - * {@code true}. - *
  • It is symmetric: for any non-null reference values - * {@code x} and {@code y}, {@code x.equals(y)} - * should return {@code true} if and only if - * {@code y.equals(x)} returns {@code true}. - *
  • It is transitive: for any non-null reference values - * {@code x}, {@code y}, and {@code z}, if - * {@code x.equals(y)} returns {@code true} and - * {@code y.equals(z)} returns {@code true}, then - * {@code x.equals(z)} should return {@code true}. - *
  • It is consistent: for any non-null reference values - * {@code x} and {@code y}, multiple invocations of - * {@code x.equals(y)} consistently return {@code true} - * or consistently return {@code false}, provided no - * information used in {@code equals} comparisons on the - * objects is modified. - *
  • For any non-null reference value {@code x}, - * {@code x.equals(null)} should return {@code false}. - *
- *

- * The {@code equals} method for class {@code Object} implements - * the most discriminating possible equivalence relation on objects; - * that is, for any non-null reference values {@code x} and - * {@code y}, this method returns {@code true} if and only - * if {@code x} and {@code y} refer to the same object - * ({@code x == y} has the value {@code true}). - *

- * Note that it is generally necessary to override the {@code hashCode} - * method whenever this method is overridden, so as to maintain the - * general contract for the {@code hashCode} method, which states - * that equal objects must have equal hash codes. - * - * @param obj the reference object with which to compare. - * @return {@code true} if this object is the same as the obj - * argument; {@code false} otherwise. - * @see #hashCode() - * @see HashMap - */ - @Override - public boolean equals(Object obj) { - if (obj == null) - return false; - if (obj instanceof MultiLayerNetwork) { - MultiLayerNetwork network = (MultiLayerNetwork) obj; - boolean paramsEquals = network.params().equals(params()); - boolean confEquals = getLayerWiseConfigurations().equals(network.getLayerWiseConfigurations()); - boolean updaterEquals = getUpdater().equals(network.getUpdater()); - return paramsEquals && confEquals && updaterEquals; + outType = currentLayer.conf().getLayer().getOutputType(currentLayer.getIndex(), inputType); + outShape = outType.toString(); + inputType = outType; + } + if (currentLayer.numParams() > 0) { + paramShape = ""; + if (currentLayer instanceof BidirectionalLayer) { // Bidirectional layer is not an FFL + BidirectionalLayer bi = (BidirectionalLayer) currentLayer; + in = String.valueOf(((Bidirectional) bi.conf().getLayer()).getNIn()); + out = String.valueOf(((Bidirectional) bi.conf().getLayer()).getNOut()); + } else { + try { + in = String.valueOf(((FeedForwardLayer) currentLayer.conf().getLayer()).getNIn()); + out = String.valueOf(((FeedForwardLayer) currentLayer.conf().getLayer()).getNOut()); + } catch ( + Exception e) { // Some layers, like PReLU, are just BaseLayers (but have parameters) + } } - return false; - } - - private void writeObject(ObjectOutputStream oos) throws IOException { - ModelSerializer.writeModel(this, oos, true); - } - - private void readObject(ObjectInputStream ois) throws ClassNotFoundException, IOException { - val mln = ModelSerializer.restoreMultiLayerNetwork(ois, true); - - this.defaultConfiguration = mln.defaultConfiguration.clone(); - this.layerWiseConfigurations = mln.layerWiseConfigurations.clone(); - this.init(); - this.flattenedParams.assign(mln.flattenedParams); - - int numWorkingMem = 2 * (layerWiseConfigurations.getConfs().size() + layerWiseConfigurations.getInputPreProcessors().size()); - WS_LAYER_WORKING_MEM_CONFIG = getLayerWorkingMemWSConfig(numWorkingMem); - WS_LAYER_ACT_X_CONFIG = getLayerActivationWSConfig(layerWiseConfigurations.getConfs().size()); - - if (mln.getUpdater() != null && mln.getUpdater(false).getStateViewArray() != null) - this.getUpdater(true).getStateViewArray().assign(mln.getUpdater(false).getStateViewArray()); - } - - /** - * Close the network and deallocate all native memory, including: parameters, gradients, updater memory and workspaces - * Note that the network should not be used again for any purpose after it has been closed - */ - @Override - public void close(){ - //Close the INDArray and dealloc - if(flattenedParams.closeable()) - flattenedParams.close(); - - if(flattenedGradients != null && flattenedGradients.closeable()) - flattenedGradients.close(); - - Updater u = getUpdater(false); - if(u != null && u.getStateViewArray() != null) { - INDArray state = u.getStateViewArray(); - if(state.closeable()) - state.close(); + Set paraNames = currentLayer.paramTable().keySet(); + for (String aP : paraNames) { + String paramS = ArrayUtils.toString(currentLayer.paramTable().get(aP).shape()); + paramShape += aP + ":" + paramS + ", "; } + paramShape = paramShape.subSequence(0, paramShape.lastIndexOf(",")).toString(); + } + if (currentLayer instanceof FrozenLayer) { + frozenParams += currentLayer.numParams(); + classNameArr = ((FrozenLayer) currentLayer).getInsideLayer().getClass().getName() + .split("\\."); + className = "Frozen " + classNameArr[classNameArr.length - 1]; + } - Nd4j.getWorkspaceManager().destroyAllWorkspacesForCurrentThread(); - System.gc(); + String[] line; + if (inputType == null) { + line = new String[]{name + " (" + className + ")", in + "," + out, paramCount, paramShape}; + } else { + line = new String[]{name + " (" + className + ")", in + "," + out, paramCount, paramShape, + inShape, outShape}; + } + for (int i = 0; i < line.length; i++) { + maxLength[i] = Math.max(maxLength[i], line[i] == null ? 0 : line[i].length()); + } + lines.add(line); } + + StringBuilder sbFormat = new StringBuilder(); + int totalLength = 0; + int pos = 0; + for (int length : maxLength) { + int currLength; + if (pos++ == maxLength.length - 1) { + currLength = length; + } else { + currLength = length + 3; + } + sbFormat.append("%-").append(currLength).append("s"); + totalLength += currLength; + } + sbFormat.append("\n"); + String format = sbFormat.toString(); + + ret.append(StringUtils.repeat("=", totalLength)) + .append("\n"); + + boolean first = true; + for (String[] line : lines) { + String formatted = String.format(format, (Object[]) line); + ret.append(formatted); + if (first) { + ret.append(StringUtils.repeat("=", totalLength)).append("\n"); + first = false; + } + } + + ret.append(StringUtils.repeat("-", totalLength)); + ret.append(String.format("\n%30s %,d", "Total Parameters: ", params().length())); + ret.append( + String.format("\n%30s %,d", "Trainable Parameters: ", params().length() - frozenParams)); + ret.append(String.format("\n%30s %,d", "Frozen Parameters: ", frozenParams)); + ret.append("\n"); + ret.append(StringUtils.repeat("=", totalLength)); + ret.append("\n"); + return ret.toString(); + } + + /** + * Generate information regarding memory use for the network, for the given input type and + * minibatch size. Note that when using workspaces or CuDNN, the network should be trained for + * some iterations so that the memory workspaces have time to initialize. Without this, the memory + * requirements during training may be underestimated. + *

+ * Note also that this is the same information that is generated during an OOM crash when training + * or performing inference. + * + * @param minibatch Minibatch size to estimate memory for + * @param inputType Input type to the network + * @return A String with information about network memory use information + */ + public String memoryInfo(int minibatch, InputType inputType) { + return CrashReportingUtil.generateMemoryStatus(this, minibatch, inputType); + } + + /** + * This method just makes sure there's no state preserved within layers + */ + public void clearLayersStates() { + for (Layer layer : layers) { + layer.clear(); + layer.clearNoiseWeightParams(); + } + } + + /** + * Increment the epoch count (in the underlying {@link MultiLayerConfiguration} by 1). Note that + * this is done automatically when using iterator-based fitting methods, such as + * {@link #fit(DataSetIterator)}. However, when using non-iterator fit methods (DataSet, + * INDArray/INDArray etc), the network has no way to know when one epoch ends and another starts. + * In such situations, this method can be used to increment the epoch counter.
Note that the + * epoch counter is used for situations such as some learning rate schedules, and the like. + *

+ * The current epoch count can be obtained using + * {@code MultiLayerConfiguration.getLayerwiseConfiguration().getEpochCount()} + */ + public void incrementEpochCount() { + layerWiseConfigurations.setEpochCount(layerWiseConfigurations.getEpochCount() + 1); + synchronizeIterEpochCounts(); + } + + protected void synchronizeIterEpochCounts() { + //TODO: this is necessary for some schedules - but the redundant values are a little ugly... + int currIter = getIterationCount(); + int currEpoch = getEpochCount(); + for (Layer l : layers) { + l.setIterationCount(currIter); + l.setEpochCount(currEpoch); + } + } + + /** + * Save the MultiLayerNetwork to a file. Restore using {@link #load(File, boolean)}. Note that + * this saves the updater (i.e., the state array for momentum/Adam/rmsprop etc), which is + * desirable if further training will be undertaken. + * + * @param f File to save the network to + * @see ModelSerializer ModelSerializer for more details (and saving/loading via streams) + * @see #save(File, boolean) + */ + public void save(File f) throws IOException { + save(f, true); + } + + /** + * Save the MultiLayerNetwork to a file. Restore using {@link #load(File, boolean)}. + * + * @param f File to save the network to + * @param saveUpdater If true: save the updater (i.e., the state array for momentum/Adam/rmsprop + * etc), which should usually be saved if further training is required + * @see ModelSerializer ModelSerializer for more details (and saving/loading via streams) + * @see #save(File, boolean) + */ + public void save(File f, boolean saveUpdater) throws IOException { + ModelSerializer.writeModel(this, f, saveUpdater); + } + + /** + * Convert this MultiLayerNetwork to a ComputationGraph + * + * @return ComputationGraph equivalent to this network (including parameters and updater state) + */ + public ComputationGraph toComputationGraph() { + return NetworkUtils.toComputationGraph(this); + } + + /** + * Return a copy of the network with the parameters and activations set to use the specified + * (floating point) data type. If the existing datatype is the same as the requested dataype, the + * original network will be returned unchanged. Only floating point datatypes (DOUBLE, FLOAT, + * HALF) may be used. + * + * @param dataType Datatype to convert the network to + * @return The network, set to use the specified datatype for the parameters and activations + */ + public MultiLayerNetwork convertDataType(@NonNull DataType dataType) { + Preconditions.checkState(dataType.isFPType(), + "Invalid DataType: %s. Can only convert network to a floating point type", dataType); + if (dataType == params().dataType()) { + return this; + } + + try (MemoryWorkspace ws = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { + INDArray newParams = params().castTo(dataType); + String jsonConfig = getLayerWiseConfigurations().toJson(); + MultiLayerConfiguration newConf = MultiLayerConfiguration.fromJson(jsonConfig); + newConf.setDataType(dataType); + MultiLayerNetwork newNet = new MultiLayerNetwork(newConf); + newNet.init(newParams, false); + + Updater u = getUpdater(false); + if (u != null && u.getStateViewArray() != null) { + INDArray oldUpdaterState = u.getStateViewArray(); + newNet.getUpdater(true).getStateViewArray().assign(oldUpdaterState); + } + return newNet; + } + } + + /** + * Set the learning rate for all layers in the network to the specified value. Note that if any + * learning rate schedules are currently present, these will be removed in favor of the new + * (fixed) learning rate.
+ *
+ * Note: This method not free from a performance point of view: a proper learning + * rate schedule + * should be used in preference to calling this method at every iteration. + * + * @param newLr New learning rate for all layers + * @see #setLearningRate(ISchedule) + * @see #setLearningRate(int, double) + */ + public void setLearningRate(double newLr) { + NetworkUtils.setLearningRate(this, newLr); + } + + /** + * Set the learning rate schedule for all layers in the network to the specified schedule. This + * schedule will replace any/all existing schedules, and also any fixed learning rate values.
+ * Note that the iteration/epoch counts will not be reset. Use + * {@link MultiLayerConfiguration#setIterationCount(int)} and + * {@link MultiLayerConfiguration#setEpochCount(int)} if this is required + * + * @param newLr New learning rate schedule for all layers + * @see #setLearningRate(ISchedule) + * @see #setLearningRate(int, double) + */ + public void setLearningRate(ISchedule newLr) { + NetworkUtils.setLearningRate(this, newLr); + } + + /** + * Set the learning rate for a single layer in the network to the specified value. Note that if + * any learning rate schedules are currently present, these will be removed in favor of the new + * (fixed) learning rate.
+ *
+ * Note: This method not free from a performance point of view: a proper learning + * rate schedule + * should be used in preference to calling this method at every iteration. Note also that + * {@link #setLearningRate(double)} should also be used in preference, when all layers need to be + * set to a new LR + * + * @param layerNumber Number of the layer to set the LR for + * @param newLr New learning rate for a single layer + * @see #setLearningRate(ISchedule) + * @see #setLearningRate(int, double) + */ + public void setLearningRate(int layerNumber, double newLr) { + NetworkUtils.setLearningRate(this, layerNumber, newLr); + } + + /** + * Set the learning rate schedule for a single layer in the network to the specified value.
+ * Note also that {@link #setLearningRate(ISchedule)} should also be used in preference, when all + * layers need to be set to a new LR schedule.
This schedule will replace any/all existing + * schedules, and also any fixed learning rate values.
Note also that the iteration/epoch + * counts will not be reset. Use {@link MultiLayerConfiguration#setIterationCount(int)} and + * {@link MultiLayerConfiguration#setEpochCount(int)} if this is required + * + * @param layerNumber Number of the layer to set the LR schedule for + * @param newLr New learning rate for a single layer + * @see #setLearningRate(ISchedule) + * @see #setLearningRate(int, double) + */ + public void setLearningRate(int layerNumber, ISchedule newLr) { + NetworkUtils.setLearningRate(this, layerNumber, newLr); + } + + /** + * Get the current learning rate, for the specified layer, from the network. Note: If the layer + * has no learning rate (no parameters, or an updater without a learning rate) then null is + * returned + * + * @param layerNumber Layer number to get the learning rate for + * @return Learning rate for the specified layer, or null + */ + public Double getLearningRate(int layerNumber) { + return NetworkUtils.getLearningRate(this, layerNumber); + } + + /** + * Return the layer size (number of units) for the specified layer.
Note that the meaning of + * the "layer size" can depend on the type of layer. For example:
- DenseLayer, OutputLayer, + * recurrent layers: number of units (nOut configuration option)
- ConvolutionLayer: the + * channels (number of channels)
- Subsampling layers, global pooling layers, etc: size of 0 + * is always returned
+ * + * @param layer Index of the layer to get the size of. Must be in range 0 to nLayers-1 inclusive + * @return Size of the layer + */ + public int layerSize(int layer) { + if (layer < 0 || layer > layers.length) { + throw new IllegalArgumentException( + "Invalid layer index: " + layer + ". Layer index must be between 0 and " + + (layers.length - 1) + " inclusive"); + } + org.deeplearning4j.nn.conf.layers.Layer conf = layers[layer].conf().getLayer(); + if (conf == null || !(conf instanceof FeedForwardLayer)) { + return 0; + } + FeedForwardLayer ffl = (FeedForwardLayer) conf; + + if (ffl.getNOut() > Integer.MAX_VALUE) { + throw new ND4JArraySizeException(); + } + return (int) ffl.getNOut(); + } + + /** + * Return the input size (number of inputs) for the specified layer.
Note that the meaning of + * the "input size" can depend on the type of layer. For example:
- DenseLayer, OutputLayer, + * etc: the feature vector size (nIn configuration option)
- Recurrent layers: the feature + * vector size per time step (nIn configuration option)
- ConvolutionLayer: the + * channels (number of channels)
- Subsampling layers, global pooling layers, etc: size of 0 + * is always returned
+ * + * @param layer Index of the layer to get the size of. Must be in range 0 to nLayers-1 inclusive + * @return Size of the layer + */ + public int layerInputSize(int layer) { + if (layer < 0 || layer > layers.length) { + throw new IllegalArgumentException( + "Invalid layer index: " + layer + ". Layer index must be between 0 and " + + (layers.length - 1) + " inclusive"); + } + org.deeplearning4j.nn.conf.layers.Layer conf = layers[layer].conf().getLayer(); + if (conf == null || !(conf instanceof FeedForwardLayer)) { + return 0; + } + FeedForwardLayer ffl = (FeedForwardLayer) conf; + + if (ffl.getNIn() > Integer.MAX_VALUE) { + throw new ND4JArraySizeException(); + } + return (int) ffl.getNIn(); + } + + /** + * Indicates whether some other object is "equal to" this one. + *

+ * The {@code equals} method implements an equivalence relation on non-null object references: + *

    + *
  • It is reflexive: for any non-null reference value + * {@code x}, {@code x.equals(x)} should return + * {@code true}. + *
  • It is symmetric: for any non-null reference values + * {@code x} and {@code y}, {@code x.equals(y)} + * should return {@code true} if and only if + * {@code y.equals(x)} returns {@code true}. + *
  • It is transitive: for any non-null reference values + * {@code x}, {@code y}, and {@code z}, if + * {@code x.equals(y)} returns {@code true} and + * {@code y.equals(z)} returns {@code true}, then + * {@code x.equals(z)} should return {@code true}. + *
  • It is consistent: for any non-null reference values + * {@code x} and {@code y}, multiple invocations of + * {@code x.equals(y)} consistently return {@code true} + * or consistently return {@code false}, provided no + * information used in {@code equals} comparisons on the + * objects is modified. + *
  • For any non-null reference value {@code x}, + * {@code x.equals(null)} should return {@code false}. + *
+ *

+ * The {@code equals} method for class {@code Object} implements + * the most discriminating possible equivalence relation on objects; + * that is, for any non-null reference values {@code x} and + * {@code y}, this method returns {@code true} if and only + * if {@code x} and {@code y} refer to the same object + * ({@code x == y} has the value {@code true}). + *

+ * Note that it is generally necessary to override the {@code hashCode} + * method whenever this method is overridden, so as to maintain the + * general contract for the {@code hashCode} method, which states + * that equal objects must have equal hash codes. + * + * @param obj the reference object with which to compare. + * @return {@code true} if this object is the same as the obj argument; {@code false} otherwise. + * @see #hashCode() + * @see HashMap + */ + @Override + public boolean equals(Object obj) { + if (obj == null) { + return false; + } + if (obj instanceof MultiLayerNetwork) { + MultiLayerNetwork network = (MultiLayerNetwork) obj; + boolean paramsEquals = network.params().equals(params()); + boolean confEquals = getLayerWiseConfigurations().equals( + network.getLayerWiseConfigurations()); + boolean updaterEquals = getUpdater().equals(network.getUpdater()); + return paramsEquals && confEquals && updaterEquals; + } + return false; + } + + private void writeObject(ObjectOutputStream oos) throws IOException { + ModelSerializer.writeModel(this, oos, true); + } + + private void readObject(ObjectInputStream ois) throws ClassNotFoundException, IOException { + val mln = ModelSerializer.restoreMultiLayerNetwork(ois, true); + + this.defaultConfiguration = mln.defaultConfiguration.clone(); + this.layerWiseConfigurations = mln.layerWiseConfigurations.clone(); + this.init(); + this.flattenedParams.assign(mln.flattenedParams); + + int numWorkingMem = 2 * (layerWiseConfigurations.getConfs().size() + + layerWiseConfigurations.getInputPreProcessors().size()); + WS_LAYER_WORKING_MEM_CONFIG = getLayerWorkingMemWSConfig(numWorkingMem); + WS_LAYER_ACT_X_CONFIG = getLayerActivationWSConfig(layerWiseConfigurations.getConfs().size()); + + if (mln.getUpdater() != null && mln.getUpdater(false).getStateViewArray() != null) { + this.getUpdater(true).getStateViewArray().assign(mln.getUpdater(false).getStateViewArray()); + } + } + + /** + * Close the network and deallocate all native memory, including: parameters, gradients, updater + * memory and workspaces Note that the network should not be used again for any purpose after it + * has been closed + */ + @Override + public void close() { + //Close the INDArray and dealloc + if (flattenedParams.closeable()) { + flattenedParams.close(); + } + + if (flattenedGradients != null && flattenedGradients.closeable()) { + flattenedGradients.close(); + } + + Updater u = getUpdater(false); + if (u != null && u.getStateViewArray() != null) { + INDArray state = u.getStateViewArray(); + if (state.closeable()) { + state.close(); + } + } + + Nd4j.getWorkspaceManager().destroyAllWorkspacesForCurrentThread(); + System.gc(); + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearning.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearning.java index 52ae7c891..b941cf636 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearning.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearning.java @@ -572,7 +572,7 @@ public class TransferLearning { */ public GraphBuilder(ComputationGraph origGraph) { this.origGraph = origGraph; - this.origConfig = origGraph.getConfiguration().clone(); + this.origConfig = origGraph.getComputationGraphConfiguration().clone(); } /** diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearningHelper.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearningHelper.java index f6f3a35c1..a6f7d6c4f 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearningHelper.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearningHelper.java @@ -242,7 +242,7 @@ public class TransferLearningHelper { } Set frozenInputVerticesSorted = new HashSet<>(); - frozenInputVerticesSorted.addAll(origGraph.getConfiguration().getNetworkInputs()); + frozenInputVerticesSorted.addAll(origGraph.getComputationGraphConfiguration().getNetworkInputs()); frozenInputVerticesSorted.removeAll(allFrozen); //remove input vertices - just to add back in a predictable order for (String existingInput : frozenInputVerticesSorted) { @@ -328,7 +328,7 @@ public class TransferLearningHelper { String anInput = graphInputs.get(i); if (origGraph.getVertex(anInput).isInputVertex()) { //was an original input to the graph - int inputIndex = origGraph.getConfiguration().getNetworkInputs().indexOf(anInput); + int inputIndex = origGraph.getComputationGraphConfiguration().getNetworkInputs().indexOf(anInput); featuresNow[i] = origGraph.getInput(inputIndex); } else { //needs to be grabbed from the internal activations diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/BaseMultiLayerUpdater.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/BaseMultiLayerUpdater.java index 4f4d1690f..91d24de46 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/BaseMultiLayerUpdater.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/BaseMultiLayerUpdater.java @@ -385,14 +385,14 @@ public abstract class BaseMultiLayerUpdater implements Updater /** * Pre-apply: Apply gradient normalization/clipping * - * @param layer Layer to apply gradient normalization/clipping for + * @param layer ILayer to apply gradient normalization/clipping for * @param gradient Gradient to update * @param iteration The current iteration (i.e., number of parameter updates so far) */ public void preApply(Trainable layer, Gradient gradient, int iteration) { if (layer.getConfig() == null || layer.numParams() == 0) { - //Layer does not have parameters -> no gradient + //ILayer does not have parameters -> no gradient return; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/api/TrainingListener.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/api/TrainingListener.java index 7c96fd750..81a2d8465 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/api/TrainingListener.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/api/TrainingListener.java @@ -54,7 +54,7 @@ public interface TrainingListener { * only at training time * * @param model Model - * @param activations Layer activations (including input) + * @param activations ILayer activations (including input) */ void onForwardPass(Model model, List activations); @@ -63,7 +63,7 @@ public interface TrainingListener { * only at training time * * @param model Model - * @param activations Layer activations (including input) + * @param activations ILayer activations (including input) */ void onForwardPass(Model model, Map activations); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/CheckpointListener.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/CheckpointListener.java index 4ebf2e050..550e4425b 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/CheckpointListener.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/CheckpointListener.java @@ -247,7 +247,7 @@ public class CheckpointListener extends BaseTrainingListener implements Serializ if (model instanceof MultiLayerNetwork) { return ((MultiLayerNetwork) model).getLayerWiseConfigurations().getIterationCount(); } else if (model instanceof ComputationGraph) { - return ((ComputationGraph) model).getConfiguration().getIterationCount(); + return ((ComputationGraph) model).getComputationGraphConfiguration().getIterationCount(); } else { return model.conf().getIterationCount(); } @@ -257,7 +257,7 @@ public class CheckpointListener extends BaseTrainingListener implements Serializ if (model instanceof MultiLayerNetwork) { return ((MultiLayerNetwork) model).getLayerWiseConfigurations().getEpochCount(); } else if (model instanceof ComputationGraph) { - return ((ComputationGraph) model).getConfiguration().getEpochCount(); + return ((ComputationGraph) model).getComputationGraphConfiguration().getEpochCount(); } else { return model.conf().getEpochCount(); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/BaseOptimizer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/BaseOptimizer.java index 3a8bfee10..42ce490e5 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/BaseOptimizer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/BaseOptimizer.java @@ -336,7 +336,7 @@ public abstract class BaseOptimizer implements ConvexOptimizer { if (model instanceof MultiLayerNetwork) { return ((MultiLayerNetwork) model).getLayerWiseConfigurations().getIterationCount(); } else if (model instanceof ComputationGraph) { - return ((ComputationGraph) model).getConfiguration().getIterationCount(); + return ((ComputationGraph) model).getComputationGraphConfiguration().getIterationCount(); } else { return model.conf().getIterationCount(); } @@ -347,7 +347,7 @@ public abstract class BaseOptimizer implements ConvexOptimizer { MultiLayerConfiguration conf = ((MultiLayerNetwork) model).getLayerWiseConfigurations(); conf.setIterationCount(conf.getIterationCount() + incrementBy); } else if (model instanceof ComputationGraph) { - ComputationGraphConfiguration conf = ((ComputationGraph) model).getConfiguration(); + ComputationGraphConfiguration conf = ((ComputationGraph) model).getComputationGraphConfiguration(); conf.setIterationCount(conf.getIterationCount() + incrementBy); } else { model.conf().setIterationCount(model.conf().getIterationCount() + incrementBy); @@ -358,7 +358,7 @@ public abstract class BaseOptimizer implements ConvexOptimizer { if (model instanceof MultiLayerNetwork) { return ((MultiLayerNetwork) model).getLayerWiseConfigurations().getEpochCount(); } else if (model instanceof ComputationGraph) { - return ((ComputationGraph) model).getConfiguration().getEpochCount(); + return ((ComputationGraph) model).getComputationGraphConfiguration().getEpochCount(); } else { return model.conf().getEpochCount(); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/Convolution1DUtils.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/Convolution1DUtils.java index 32c40bdfc..53bed93a2 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/Convolution1DUtils.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/Convolution1DUtils.java @@ -79,7 +79,7 @@ public class Convolution1DUtils { * @return the format for the layer */ public static RNNFormat getRnnFormatFromLayer(Layer layer) { - Preconditions.checkState(hasRnnDataFormat(layer),"Layer of type " + layer.getClass().getName() + " and name " + layer.getLayerName() + " does not have an RNNFormat"); + Preconditions.checkState(hasRnnDataFormat(layer),"ILayer of type " + layer.getClass().getName() + " and name " + layer.getLayerName() + " does not have an RNNFormat"); if(layer instanceof SimpleRnn) { SimpleRnn simpleRnn = (SimpleRnn) layer; return simpleRnn.getRnnDataFormat(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/CrashReportingUtil.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/CrashReportingUtil.java index ac28ced80..5227ad77f 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/CrashReportingUtil.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/CrashReportingUtil.java @@ -320,12 +320,12 @@ public class CrashReportingUtil { appendHelperInformation(sb, mln.getLayers()); appendActivationShapes(mln, (inputTypes == null || inputTypes.length == 0 ? null : inputTypes[0]), minibatch, sb, bytesPerElement); } else { - sb.append(f("Backprop Type", cg.getConfiguration().getBackpropType())); - if(cg.getConfiguration().getBackpropType() == BackpropType.TruncatedBPTT){ - sb.append(f("TBPTT Length", cg.getConfiguration().getTbpttFwdLength() + "/" + cg.getConfiguration().getTbpttBackLength())); + sb.append(f("Backprop Type", cg.getComputationGraphConfiguration().getBackpropType())); + if(cg.getComputationGraphConfiguration().getBackpropType() == BackpropType.TruncatedBPTT){ + sb.append(f("TBPTT Length", cg.getComputationGraphConfiguration().getTbpttFwdLength() + "/" + cg.getComputationGraphConfiguration().getTbpttBackLength())); } - sb.append(f("Workspace Mode: Training", cg.getConfiguration().getTrainingWorkspaceMode())); - sb.append(f("Workspace Mode: Inference", cg.getConfiguration().getInferenceWorkspaceMode())); + sb.append(f("Workspace Mode: Training", cg.getComputationGraphConfiguration().getTrainingWorkspaceMode())); + sb.append(f("Workspace Mode: Inference", cg.getComputationGraphConfiguration().getInferenceWorkspaceMode())); appendLayerInformation(sb, cg.getLayers(), bytesPerElement); appendHelperInformation(sb, cg.getLayers()); appendActivationShapes(cg, sb, bytesPerElement); @@ -461,13 +461,13 @@ public class CrashReportingUtil { List l = new ArrayList<>(layerClasses.keySet()); Collections.sort(l); sb.append(f("Number of Layers", layers.length)); - sb.append("Layer Counts\n"); + sb.append("ILayer Counts\n"); for(String s : l){ sb.append(" ").append(f(s, layerClasses.get(s))); } - sb.append("Layer Parameter Breakdown\n"); + sb.append("ILayer Parameter Breakdown\n"); String format = " %-3s %-20s %-20s %-20s %-20s"; - sb.append(String.format(format, "Idx", "Name", "Layer Type", "Layer # Parameters", "Layer Parameter Memory")).append("\n"); + sb.append(String.format(format, "Idx", "Name", "ILayer Type", "ILayer # Parameters", "ILayer Parameter Memory")).append("\n"); for(Layer layer : layers){ long numParams = layer.numParams(); sb.append(String.format(format, layer.getIndex(), layer.conf().getLayer().getLayerName(), @@ -477,13 +477,13 @@ public class CrashReportingUtil { } private static void appendHelperInformation(StringBuilder sb, org.deeplearning4j.nn.api.Layer[] layers){ - sb.append("\n----- Layer Helpers - Memory Use -----\n"); + sb.append("\n----- ILayer Helpers - Memory Use -----\n"); int helperCount = 0; long helperWithMemCount = 0L; long totalHelperMem = 0L; - //Layer index, layer name, layer class, helper class, total memory, breakdown + //ILayer index, layer name, layer class, helper class, total memory, breakdown String format = "%-3s %-20s %-25s %-30s %-12s %s"; boolean header = false; for(Layer l : layers){ @@ -509,7 +509,7 @@ public class CrashReportingUtil { if(!header){ - sb.append(String.format(format, "#", "Layer Name", "Layer Class", "Helper Class", "Total Memory", "Memory Breakdown")) + sb.append(String.format(format, "#", "ILayer Name", "ILayer Class", "Helper Class", "Total Memory", "Memory Breakdown")) .append("\n"); header = true; } @@ -551,7 +551,7 @@ public class CrashReportingUtil { sb.append(f("Input Shape", Arrays.toString(inputShape))); List inputTypes = net.getLayerWiseConfigurations().getLayerActivationTypes(inputType); String format = "%-3s %-20s %-20s %-42s %-20s %-12s %-12s"; - sb.append(String.format(format, "Idx", "Name", "Layer Type", "Activations Type", "Activations Shape", + sb.append(String.format(format, "Idx", "Name", "ILayer Type", "Activations Type", "Activations Shape", "# Elements", "Memory")).append("\n"); org.deeplearning4j.nn.api.Layer[] layers = net.getLayers(); long totalActivationBytes = 0; @@ -598,11 +598,11 @@ public class CrashReportingUtil { for( int i=0; i inputTypes = net.getConfiguration().getLayerActivationTypes(inputType); + Map inputTypes = net.getComputationGraphConfiguration().getLayerActivationTypes(inputType); GraphIndices indices = net.calculateIndices(); String format = "%-3s %-20s %-20s %-42s %-20s %-12s %-12s"; - sb.append(String.format(format, "Idx", "Name", "Layer Type", "Activations Type", "Activations Shape", + sb.append(String.format(format, "Idx", "Name", "ILayer Type", "Activations Type", "Activations Shape", "# Elements", "Memory")).append("\n"); org.deeplearning4j.nn.api.Layer[] layers = net.getLayers(); long totalActivationBytes = 0; @@ -633,7 +633,7 @@ public class CrashReportingUtil { sb.append(String.format(format, i, layerName, className, it, Arrays.toString(shape), (numElements < 0 ? "" : String.valueOf(numElements)), fBytes(bytes))).append("\n"); - if(!net.getConfiguration().getNetworkOutputs().contains(layerName)){ + if(!net.getComputationGraphConfiguration().getNetworkOutputs().contains(layerName)){ totalExOutput += bytes; } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/ModelSerializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/ModelSerializer.java index ae7e2e2df..e636334fd 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/ModelSerializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/ModelSerializer.java @@ -141,7 +141,7 @@ public class ModelSerializer { if (model instanceof MultiLayerNetwork) { json = ((MultiLayerNetwork) model).getLayerWiseConfigurations().toJson(); } else if (model instanceof ComputationGraph) { - json = ((ComputationGraph) model).getConfiguration().toJson(); + json = ((ComputationGraph) model).getComputationGraphConfiguration().toJson(); } ZipEntry config = new ZipEntry(CONFIGURATION_JSON); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/NetworkUtils.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/NetworkUtils.java index 7ed0a4bcb..4348be74a 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/NetworkUtils.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/NetworkUtils.java @@ -199,7 +199,7 @@ public class NetworkUtils { * Note: If the layer has no learning rate (no parameters, or an updater without a learning rate) then null is returned * * @param net Network - * @param layerNumber Layer number to get the learning rate for + * @param layerNumber ILayer number to get the learning rate for * @return Learning rate for the specified layer, or null */ public static Double getLearningRate(MultiLayerNetwork net, int layerNumber) { @@ -321,13 +321,13 @@ public class NetworkUtils { * Note: If the layer has no learning rate (no parameters, or an updater without a learning rate) then null is returned * * @param net Network - * @param layerName Layer name to get the learning rate for + * @param layerName ILayer name to get the learning rate for * @return Learning rate for the specified layer, or null */ public static Double getLearningRate(ComputationGraph net, String layerName) { Layer l = net.getLayer(layerName).conf().getLayer(); - int iter = net.getConfiguration().getIterationCount(); - int epoch = net.getConfiguration().getEpochCount(); + int iter = net.getComputationGraphConfiguration().getIterationCount(); + int epoch = net.getComputationGraphConfiguration().getEpochCount(); if (l instanceof BaseLayer) { BaseLayer bl = (BaseLayer) l; IUpdater u = bl.getIUpdater(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/OutputLayerUtil.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/OutputLayerUtil.java index 08a3d086a..fb3d9ea64 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/OutputLayerUtil.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/OutputLayerUtil.java @@ -68,7 +68,7 @@ public class OutputLayerUtil { * * If the specified layer is not an output layer, this is a no-op * @param layerName Name of the layer - * @param layer Layer + * @param layer ILayer */ public static void validateOutputLayer(String layerName, Layer layer){ IActivation activation; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/TimeSeriesUtils.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/TimeSeriesUtils.java index df4583cd8..eb5814b49 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/TimeSeriesUtils.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/TimeSeriesUtils.java @@ -440,7 +440,7 @@ public class TimeSeriesUtils { /** * Get the {@link RNNFormat} from the RNN layer, accounting for the presence of wrapper layers like Bidirectional, * LastTimeStep, etc - * @param layer Layer to get the RNNFormat from + * @param layer ILayer to get the RNNFormat from */ public static RNNFormat getFormatFromRnnLayer(Layer layer){ if(layer instanceof BaseRecurrentLayer){ diff --git a/cavis-dnn/cavis-dnn-nn/src/test/java/net/brutex/ai/dnn/api/dnnTest.java b/cavis-dnn/cavis-dnn-nn/src/test/java/net/brutex/ai/dnn/api/dnnTest.java new file mode 100644 index 000000000..06c322a57 --- /dev/null +++ b/cavis-dnn/cavis-dnn-nn/src/test/java/net/brutex/ai/dnn/api/dnnTest.java @@ -0,0 +1,127 @@ +/* + * + * ****************************************************************************** + * * + * * This program and the accompanying materials are made available under the + * * terms of the Apache License, Version 2.0 which is available at + * * https://www.apache.org/licenses/LICENSE-2.0. + * * + * * See the NOTICE file distributed with this work for additional + * * information regarding copyright ownership. + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * * License for the specific language governing permissions and limitations + * * under the License. + * * + * * SPDX-License-Identifier: Apache-2.0 + * ***************************************************************************** + * + */ + +package net.brutex.ai.dnn.api; + +import static net.brutex.ai.dnn.api.dnn.*; +import static org.junit.jupiter.api.Assertions.*; + +import java.util.Iterator; +import java.util.concurrent.atomic.AtomicInteger; +import org.apache.commons.lang3.RandomUtils; +import org.deeplearning4j.datasets.iterator.FloatsDataSetIterator; +import org.deeplearning4j.nn.conf.CacheMode; +import org.deeplearning4j.nn.conf.GradientNormalization; +import org.deeplearning4j.nn.conf.Updater; +import org.deeplearning4j.nn.conf.inputs.InputType; +import org.deeplearning4j.nn.conf.layers.ActivationLayer; +import org.deeplearning4j.nn.weights.WeightInitXavier; +import org.junit.jupiter.api.Test; +import org.nd4j.common.primitives.Pair; +import org.nd4j.linalg.activations.impl.ActivationIdentity; +import org.nd4j.linalg.activations.impl.ActivationLReLU; +import org.nd4j.linalg.learning.config.Adam; + + +class dnnTest { + + @Test + void testFFLayer() { + int numFeatures = 128; + int batchSize = 10; + int numRows = 1000; + AtomicInteger cnt = new AtomicInteger(0); + FloatsDataSetIterator iterator = new FloatsDataSetIterator(floatIterable(numRows, numFeatures), batchSize); + + assertTrue(iterator.hasNext()); + + /** + * MultiLayerConfiguration confxx = new NeuralNetConfiguration.Builder() + * .seed(42) + * .updater(UPDATER) + * .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer) + * .gradientNormalizationThreshold(GRADIENT_THRESHOLD) + * .weightInit(WeightInit.XAVIER) + * .activation(Activation.IDENTITY) + * .list(genLayers()) + * .setInputType(InputType.convolutional(X_DIM, Y_DIM, CHANNELS)) + * // .inputPreProcessor("CNN1", new FeedForwardToCnnPreProcessor(Y_DIM, X_DIM, CHANNELS)) + * .build(); + */ + + /** + * new DenseLayer.Builder().nIn(INPUT).nOut(X_DIM*Y_DIM*CHANNELS).weightInit(WeightInit.NORMAL).build(), + * new ActivationLayer.Builder(new ActivationLReLU(0.2)).build(), + * new DenseLayer.Builder().nIn(X_DIM*Y_DIM*CHANNELS).nOut(X_DIM*Y_DIM).build(), + * new ActivationLayer.Builder(new ActivationLReLU(0.2)).build(), + * new DenseLayer.Builder().nIn(X_DIM*Y_DIM).nOut(X_DIM*Y_DIM).build(), + * new ActivationLayer.Builder(new ActivationLReLU(0.2)).build(), + * new DenseLayer.Builder().nIn(X_DIM*Y_DIM).nOut(X_DIM*Y_DIM*CHANNELS).activation(Activation.TANH) + */ + dnn.conf() + .seed(42) + .updater( Adam.builder().learningRate(0.0002).beta1(0.5).build() ) + .gradientNormalization( GradientNormalization.RenormalizeL2PerLayer) + .gradientNormalizationThreshold( 100 ) + .weightInit( new WeightInitXavier() ) + .activation( new ActivationIdentity() ) + .inputType( InputType.convolutional( 28, 28, 1)) + .layer( dnn.DenseLayer(10,30).build() ) + .layer(new ActivationLayer.Builder(new ActivationLReLU(0.2)).build() ) + + ; + + + } + + protected static Iterable> floatIterable(final int totalRows, final int numColumns) { + return new Iterable>() { + @Override + public Iterator> iterator() { + return new Iterator>() { + private final AtomicInteger cnt = new AtomicInteger(0); + + @Override + public boolean hasNext() { + return cnt.incrementAndGet() <= totalRows; + } + + @Override + public Pair next() { + float[] features = new float[numColumns]; + float[] labels = new float[numColumns]; + for (int i = 0; i < numColumns; i++) { + features[i] = (float) i; + labels[i] = RandomUtils.nextFloat(0, 5); + } + return Pair.makePair(features, labels); + } + + @Override + public void remove() { + // no-op + } + }; + } + }; + } + +} \ No newline at end of file diff --git a/cavis-dnn/cavis-dnn-nn/src/test/java/net/brutex/ai/dnn/conf/layer/FFLayerTest.java b/cavis-dnn/cavis-dnn-nn/src/test/java/net/brutex/ai/dnn/conf/layer/FFLayerTest.java new file mode 100644 index 000000000..2fa944000 --- /dev/null +++ b/cavis-dnn/cavis-dnn-nn/src/test/java/net/brutex/ai/dnn/conf/layer/FFLayerTest.java @@ -0,0 +1,47 @@ +/* + * + * ****************************************************************************** + * * + * * This program and the accompanying materials are made available under the + * * terms of the Apache License, Version 2.0 which is available at + * * https://www.apache.org/licenses/LICENSE-2.0. + * * + * * See the NOTICE file distributed with this work for additional + * * information regarding copyright ownership. + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * * License for the specific language governing permissions and limitations + * * under the License. + * * + * * SPDX-License-Identifier: Apache-2.0 + * ***************************************************************************** + * + */ + +package net.brutex.ai.dnn.conf.layer; + +import net.brutex.ai.dnn.api.IModel; +import net.brutex.ai.dnn.api.INeuralNetworkConfiguration; +import net.brutex.ai.dnn.api.ILayerConfiguration; +import org.junit.jupiter.api.Test; + +class FFLayerTest { + + @Test + void instantiate() { + ILayerConfiguration ff_conf = FeedForwardLayerConfiguration.builder().build(); + INeuralNetworkConfiguration net_conf = net.brutex.ai.dnn.conf.NeuralNetworkConfiguration.builder() + .layerConfiguration(ff_conf) + .build(); + IModel network = net.brutex.ai.dnn.impl.network.NeuralNetwork.builder().name("Test Network") + .configuration(net_conf) + .build(); + ff_conf.instantiate(network); + + } + + @Test + void getOutputType() { + } +} \ No newline at end of file diff --git a/cavis-dnn/cavis-dnn-nn/src/test/java/org/deeplearning4j/nn/layers/HelperUtilsTest.java b/cavis-dnn/cavis-dnn-nn/src/test/java/org/deeplearning4j/nn/layers/HelperUtilsTest.java index bd05f187f..a3d21fb0c 100644 --- a/cavis-dnn/cavis-dnn-nn/src/test/java/org/deeplearning4j/nn/layers/HelperUtilsTest.java +++ b/cavis-dnn/cavis-dnn-nn/src/test/java/org/deeplearning4j/nn/layers/HelperUtilsTest.java @@ -34,7 +34,7 @@ import static org.junit.jupiter.api.Assertions.assertNotNull; /** */ -@DisplayName("Activation Layer Test") +@DisplayName("Activation ILayer Test") public class HelperUtilsTest extends BaseDL4JTest { @Override diff --git a/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/InplaceParallelInference.java b/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/InplaceParallelInference.java index 20dcd51d9..9f32446ae 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/InplaceParallelInference.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/InplaceParallelInference.java @@ -29,7 +29,6 @@ import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; -import org.deeplearning4j.parallelism.inference.InferenceMode; import org.deeplearning4j.parallelism.inference.LoadBalanceMode; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.exception.ND4JIllegalStateException; @@ -195,7 +194,7 @@ public class InplaceParallelInference extends ParallelInference { for (int e = 0; e < workers; e++) { if (sourceModel instanceof ComputationGraph) { // building configuration with shared parameters - val model = new ComputationGraph(ComputationGraphConfiguration.fromJson(((ComputationGraph) sourceModel).getConfiguration().toJson())); + val model = new ComputationGraph(ComputationGraphConfiguration.fromJson(((ComputationGraph) sourceModel).getComputationGraphConfiguration().toJson())); model.init(params, false); Nd4j.getExecutioner().commit(); diff --git a/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/ParallelInference.java b/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/ParallelInference.java index 52a28606e..8547e7b9f 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/ParallelInference.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/ParallelInference.java @@ -458,7 +458,7 @@ public class ParallelInference { if (protoModel instanceof ComputationGraph) { if (!rootDevice) { this.replicatedModel = new ComputationGraph(ComputationGraphConfiguration - .fromJson(((ComputationGraph) protoModel).getConfiguration().toJson())); + .fromJson(((ComputationGraph) protoModel).getComputationGraphConfiguration().toJson())); this.replicatedModel.init(); synchronized (locker) { diff --git a/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/trainer/DefaultTrainer.java b/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/trainer/DefaultTrainer.java index a1909795a..be706234f 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/trainer/DefaultTrainer.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/trainer/DefaultTrainer.java @@ -329,7 +329,7 @@ public class DefaultTrainer extends Thread implements Trainer { } else if (originalModel instanceof ComputationGraph) { if (!onRootModel) { ComputationGraphConfiguration conf = ComputationGraphConfiguration - .fromJson(((ComputationGraph) originalModel).getConfiguration().toJson()); + .fromJson(((ComputationGraph) originalModel).getComputationGraphConfiguration().toJson()); conf.setTrainingWorkspaceMode(workspaceMode); this.replicatedModel = new ComputationGraph(conf); @@ -354,7 +354,7 @@ public class DefaultTrainer extends Thread implements Trainer { } else { this.replicatedModel = originalModel; this.replicatedModel.init(); - ((ComputationGraph) replicatedModel).getConfiguration().setTrainingWorkspaceMode(workspaceMode); + ((ComputationGraph) replicatedModel).getComputationGraphConfiguration().setTrainingWorkspaceMode(workspaceMode); } } diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/graph/SparkComputationGraph.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/graph/SparkComputationGraph.java index 67b120ddf..e460ddc2f 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/graph/SparkComputationGraph.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/graph/SparkComputationGraph.java @@ -102,7 +102,7 @@ public class SparkComputationGraph extends SparkListenable { TrainingMaster trainingMaster) { sc = javaSparkContext; this.trainingMaster = trainingMaster; - this.conf = network.getConfiguration().clone(); + this.conf = network.getComputationGraphConfiguration().clone(); this.network = network; this.network.init(); diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/graph/scoring/CGVaeReconstructionErrorWithKeyFunction.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/graph/scoring/CGVaeReconstructionErrorWithKeyFunction.java index 3fa3312d7..b7da3d143 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/graph/scoring/CGVaeReconstructionErrorWithKeyFunction.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/graph/scoring/CGVaeReconstructionErrorWithKeyFunction.java @@ -56,7 +56,7 @@ public class CGVaeReconstructionErrorWithKeyFunction extends BaseVaeScoreWith if (!(l instanceof VariationalAutoencoder)) { throw new RuntimeException( "Cannot use CGVaeReconstructionErrorWithKeyFunction on network that doesn't have a VAE " - + "layer as layer 0. Layer type: " + l.getClass()); + + "layer as layer 0. ILayer type: " + l.getClass()); } return (VariationalAutoencoder) l; } diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/graph/scoring/CGVaeReconstructionProbWithKeyFunction.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/graph/scoring/CGVaeReconstructionProbWithKeyFunction.java index a71912367..43defe37f 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/graph/scoring/CGVaeReconstructionProbWithKeyFunction.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/graph/scoring/CGVaeReconstructionProbWithKeyFunction.java @@ -58,7 +58,7 @@ public class CGVaeReconstructionProbWithKeyFunction extends BaseVaeReconstruc if (!(l instanceof VariationalAutoencoder)) { throw new RuntimeException( "Cannot use CGVaeReconstructionProbWithKeyFunction on network that doesn't have a VAE " - + "layer as layer 0. Layer type: " + l.getClass()); + + "layer as layer 0. ILayer type: " + l.getClass()); } return (VariationalAutoencoder) l; } diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/multilayer/scoring/VaeReconstructionErrorWithKeyFunction.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/multilayer/scoring/VaeReconstructionErrorWithKeyFunction.java index e1c2f760d..a0bcca02b 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/multilayer/scoring/VaeReconstructionErrorWithKeyFunction.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/multilayer/scoring/VaeReconstructionErrorWithKeyFunction.java @@ -59,7 +59,7 @@ public class VaeReconstructionErrorWithKeyFunction extends BaseVaeScoreWithKe if (!(l instanceof VariationalAutoencoder)) { throw new RuntimeException( "Cannot use VaeReconstructionErrorWithKeyFunction on network that doesn't have a VAE " - + "layer as layer 0. Layer type: " + l.getClass()); + + "layer as layer 0. ILayer type: " + l.getClass()); } return (VariationalAutoencoder) l; } diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/multilayer/scoring/VaeReconstructionProbWithKeyFunction.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/multilayer/scoring/VaeReconstructionProbWithKeyFunction.java index 12fbbbeb6..d65084dc5 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/multilayer/scoring/VaeReconstructionProbWithKeyFunction.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/multilayer/scoring/VaeReconstructionProbWithKeyFunction.java @@ -59,7 +59,7 @@ public class VaeReconstructionProbWithKeyFunction extends BaseVaeReconstructi if (!(l instanceof VariationalAutoencoder)) { throw new RuntimeException( "Cannot use VaeReconstructionProbWithKeyFunction on network that doesn't have a VAE " - + "layer as layer 0. Layer type: " + l.getClass()); + + "layer as layer 0. ILayer type: " + l.getClass()); } return (VariationalAutoencoder) l; } diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/paramavg/ParameterAveragingTrainingMaster.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/paramavg/ParameterAveragingTrainingMaster.java index 3a2170bc3..4a0252b28 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/paramavg/ParameterAveragingTrainingMaster.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/paramavg/ParameterAveragingTrainingMaster.java @@ -292,7 +292,7 @@ public class ParameterAveragingTrainingMaster @Override public ParameterAveragingTrainingWorker getWorkerInstance(SparkComputationGraph graph) { - NetBroadcastTuple tuple = new NetBroadcastTuple(graph.getNetwork().getConfiguration(), + NetBroadcastTuple tuple = new NetBroadcastTuple(graph.getNetwork().getComputationGraphConfiguration(), graph.getNetwork().params(), graph.getNetwork().getUpdater().getStateViewArray()); if (collectTrainingStats) @@ -731,7 +731,7 @@ public class ParameterAveragingTrainingMaster int numUpdates = averagingFrequency; conf.setIterationCount(conf.getIterationCount() + numUpdates); } else { - ComputationGraphConfiguration conf = graph.getNetwork().getConfiguration(); + ComputationGraphConfiguration conf = graph.getNetwork().getComputationGraphConfiguration(); int numUpdates = averagingFrequency; conf.setIterationCount(conf.getIterationCount() + numUpdates); } diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/misc/TestFrozenLayers.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/misc/TestFrozenLayers.java index 887696af3..c899fae04 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/misc/TestFrozenLayers.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/misc/TestFrozenLayers.java @@ -118,7 +118,7 @@ public class TestFrozenLayers extends BaseSparkTest { boolean isFrozen = entry.getKey().startsWith("0_") || entry.getKey().startsWith("1_"); if (isFrozen) { - //Layer should be frozen -> no change + //ILayer should be frozen -> no change assertEquals(orig, now, entry.getKey()); } else { //Not frozen -> should be different @@ -195,7 +195,7 @@ public class TestFrozenLayers extends BaseSparkTest { boolean isFrozen = entry.getKey().startsWith("0_") || entry.getKey().startsWith("1_"); if (isFrozen) { - //Layer should be frozen -> no change + //ILayer should be frozen -> no change assertEquals(orig, now, entry.getKey()); } else { //Not frozen -> should be different diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestSparkMultiLayerParameterAveraging.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestSparkMultiLayerParameterAveraging.java index 48a30034a..c2c24a617 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestSparkMultiLayerParameterAveraging.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestSparkMultiLayerParameterAveraging.java @@ -835,12 +835,12 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { JavaRDD rdd = sc.parallelize(list); - assertEquals(0, sparkNet.getNetwork().getConfiguration().getIterationCount()); + assertEquals(0, sparkNet.getNetwork().getComputationGraphConfiguration().getIterationCount()); sparkNet.fit(rdd); - assertEquals(minibatchesPerWorkerPerEpoch, sparkNet.getNetwork().getConfiguration().getIterationCount()); + assertEquals(minibatchesPerWorkerPerEpoch, sparkNet.getNetwork().getComputationGraphConfiguration().getIterationCount()); sparkNet.fit(rdd); assertEquals(2 * minibatchesPerWorkerPerEpoch, - sparkNet.getNetwork().getConfiguration().getIterationCount()); + sparkNet.getNetwork().getComputationGraphConfiguration().getIterationCount()); sparkNet.getTrainingMaster().deleteTempFiles(sc); } @@ -1076,11 +1076,11 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { for(int i=0; i<3; i++ ){ assertEquals(i, sn1.getNetwork().getLayerWiseConfigurations().getEpochCount()); - assertEquals(i, sn2.getNetwork().getConfiguration().getEpochCount()); + assertEquals(i, sn2.getNetwork().getComputationGraphConfiguration().getEpochCount()); sn1.fit(rdd); sn2.fit(rdd); assertEquals(i+1, sn1.getNetwork().getLayerWiseConfigurations().getEpochCount()); - assertEquals(i+1, sn2.getNetwork().getConfiguration().getEpochCount()); + assertEquals(i+1, sn2.getNetwork().getComputationGraphConfiguration().getEpochCount()); } } } diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/pw/SharedTrainingWrapper.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/pw/SharedTrainingWrapper.java index a9e2a213b..7e521f0c1 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/pw/SharedTrainingWrapper.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/pw/SharedTrainingWrapper.java @@ -375,8 +375,8 @@ public class SharedTrainingWrapper { ((MultiLayerNetwork) model).setIterationCount(ModelParameterServer.getInstance().getStartPosition().getFirst()); ((MultiLayerNetwork) model).setEpochCount(ModelParameterServer.getInstance().getStartPosition().getSecond()); } else if (originalModel instanceof ComputationGraph) { - ((ComputationGraph) model).getConfiguration().setIterationCount(ModelParameterServer.getInstance().getStartPosition().getFirst()); - ((ComputationGraph) model).getConfiguration().setEpochCount(ModelParameterServer.getInstance().getStartPosition().getSecond()); + ((ComputationGraph) model).getComputationGraphConfiguration().setIterationCount(ModelParameterServer.getInstance().getStartPosition().getFirst()); + ((ComputationGraph) model).getComputationGraphConfiguration().setEpochCount(ModelParameterServer.getInstance().getStartPosition().getSecond()); } // if we're going to extend iteratation for debugging purposes - let's do that here @@ -421,7 +421,7 @@ public class SharedTrainingWrapper { // ok. attaching accumulator to model if (model instanceof ComputationGraph) { - ((ComputationGraph) originalModel).getConfiguration() + ((ComputationGraph) originalModel).getComputationGraphConfiguration() .setTrainingWorkspaceMode(trainingConfiguration.getWorkspaceMode()); ((ComputationGraph) originalModel).setGradientsAccumulator(accumulator); } else if (model instanceof MultiLayerNetwork) { diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/training/SharedTrainingMaster.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/training/SharedTrainingMaster.java index f0b6bc151..1a11d70a5 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/training/SharedTrainingMaster.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/training/SharedTrainingMaster.java @@ -295,7 +295,7 @@ public class SharedTrainingMaster extends BaseTrainingMaster layerNames = getlayerNames(); for (String s : layerNames) { @@ -728,7 +728,7 @@ public class SbeStatsReport implements StatsReport, AgronaPersistable { pne.next().paramName(s); } - //Layer names + //ILayer names List layerNames = getlayerNames(); UpdateEncoder.LayerNamesEncoder lne = ue.layerNamesCount(layerNames.size()); for (String s : layerNames) { diff --git a/cavis-ui/cavis-ui-vertx/src/main/java/org/deeplearning4j/ui/module/train/TrainModuleUtils.java b/cavis-ui/cavis-ui-vertx/src/main/java/org/deeplearning4j/ui/module/train/TrainModuleUtils.java index ff6f00901..274e670f6 100644 --- a/cavis-ui/cavis-ui-vertx/src/main/java/org/deeplearning4j/ui/module/train/TrainModuleUtils.java +++ b/cavis-ui/cavis-ui-vertx/src/main/java/org/deeplearning4j/ui/module/train/TrainModuleUtils.java @@ -182,7 +182,7 @@ public class TrainModuleUtils { long inputSize = (i == 0 ? va.getNIn() : encLayerSizes[i - 1]); long outputSize = encLayerSizes[i]; encoderInfo.put("Input Size", String.valueOf(inputSize)); - encoderInfo.put("Layer Size", String.valueOf(outputSize)); + encoderInfo.put("ILayer Size", String.valueOf(outputSize)); encoderInfo.put("Num Parameters", String.valueOf((inputSize + 1) * outputSize)); encoderInfo.put("Activation Function", va.getActivationFn().toString()); layerInfo.add(encoderInfo); @@ -197,7 +197,7 @@ public class TrainModuleUtils { long inputSize = encLayerSizes[encLayerSizes.length - 1]; long outputSize = va.getNOut(); latentInfo.put("Input Size", String.valueOf(inputSize)); - latentInfo.put("Layer Size", String.valueOf(outputSize)); + latentInfo.put("ILayer Size", String.valueOf(outputSize)); latentInfo.put("Num Parameters", String.valueOf((inputSize + 1) * outputSize * 2)); latentInfo.put("Activation Function", va.getPzxActivationFn().toString()); layerInfo.add(latentInfo); @@ -216,7 +216,7 @@ public class TrainModuleUtils { inputSize = (i == 0 ? va.getNOut() : decLayerSizes[i - 1]); outputSize = decLayerSizes[i]; decoderInfo.put("Input Size", String.valueOf(inputSize)); - decoderInfo.put("Layer Size", String.valueOf(outputSize)); + decoderInfo.put("ILayer Size", String.valueOf(outputSize)); decoderInfo.put("Num Parameters", String.valueOf((inputSize + 1) * outputSize)); decoderInfo.put("Activation Function", va.getActivationFn().toString()); layerInfo.add(decoderInfo); @@ -231,7 +231,7 @@ public class TrainModuleUtils { inputSize = decLayerSizes[decLayerSizes.length - 1]; outputSize = va.getNIn(); reconstructionInfo.put("Input Size", String.valueOf(inputSize)); - reconstructionInfo.put("Layer Size", String.valueOf(outputSize)); + reconstructionInfo.put("ILayer Size", String.valueOf(outputSize)); reconstructionInfo.put("Num Parameters", String .valueOf((inputSize + 1) * va.getOutputDistribution().distributionInputSize((int) va.getNIn()))); reconstructionInfo.put("Distribution", va.getOutputDistribution().toString()); diff --git a/cavis-ui/cavis-ui-vertx/src/main/resources/templates/TrainingModel.html.ftl b/cavis-ui/cavis-ui-vertx/src/main/resources/templates/TrainingModel.html.ftl index 859aae287..51d63af6b 100644 --- a/cavis-ui/cavis-ui-vertx/src/main/resources/templates/TrainingModel.html.ftl +++ b/cavis-ui/cavis-ui-vertx/src/main/resources/templates/TrainingModel.html.ftl @@ -103,7 +103,7 @@

- +
@@ -179,7 +179,7 @@
- +
@@ -244,7 +244,7 @@ - + diff --git a/cavis-zoo/cavis-zoo-models/src/test/java/org/deeplearning4j/zoo/TestUtils.java b/cavis-zoo/cavis-zoo-models/src/test/java/org/deeplearning4j/zoo/TestUtils.java index a61ae386d..44d9dff3c 100644 --- a/cavis-zoo/cavis-zoo-models/src/test/java/org/deeplearning4j/zoo/TestUtils.java +++ b/cavis-zoo/cavis-zoo-models/src/test/java/org/deeplearning4j/zoo/TestUtils.java @@ -65,7 +65,7 @@ public class TestUtils { ByteArrayInputStream bais = new ByteArrayInputStream(bytes); ComputationGraph restored = ModelSerializer.restoreComputationGraph(bais, true); - assertEquals(net.getConfiguration(), restored.getConfiguration()); + assertEquals(net.getComputationGraphConfiguration(), restored.getComputationGraphConfiguration()); assertEquals(net.params(), restored.params()); return restored; diff --git a/settings.gradle b/settings.gradle index 80b29bef8..d7875c751 100644 --- a/settings.gradle +++ b/settings.gradle @@ -100,7 +100,7 @@ include ':cavis-dnn:cavis-dnn-data:cavis-dnn-data-utility-iterators' include ':cavis-dnn:cavis-dnn-modelimport' include ':cavis-dnn:cavis-dnn-nlp' include ':cavis-dnn:cavis-dnn-nn' -include ':cavis-dnn:cavis-dnn-nn-api' +//include ':cavis-dnn:cavis-dnn-nn-api' include ':cavis-dnn:cavis-dnn-nn-parent' include ':cavis-dnn:cavis-dnn-nn-parent:cavis-dnn-nn-server' include ':cavis-dnn:cavis-dnn-nn-parent:cavis-dnn-nn-client'