diff --git a/brutex-extended-tests/src/test/java/net/brutex/gan/App.java b/brutex-extended-tests/src/test/java/net/brutex/gan/App.java index c03d9f5c2..aba07ef0d 100644 --- a/brutex-extended-tests/src/test/java/net/brutex/gan/App.java +++ b/brutex-extended-tests/src/test/java/net/brutex/gan/App.java @@ -47,6 +47,7 @@ import org.datavec.image.transform.ShowImageTransform; import org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator; import org.deeplearning4j.nn.conf.GradientNormalization; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.ActivationLayer; import org.deeplearning4j.nn.conf.layers.DenseLayer; @@ -54,9 +55,11 @@ import org.deeplearning4j.nn.conf.layers.DropoutLayer; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.conf.layers.misc.FrozenLayerWithBackprop; +import org.deeplearning4j.nn.conf.weightnoise.WeightNoise; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.weights.WeightInit; import org.deeplearning4j.nn.weights.WeightInitXavier; +import org.deeplearning4j.optimize.listeners.PerformanceListener; import org.deeplearning4j.optimize.listeners.ScoreToChartListener; import org.junit.jupiter.api.Test; import org.nd4j.linalg.activations.Activation; @@ -181,6 +184,7 @@ public class App { .gradientNormalization( GradientNormalization.RenormalizeL2PerLayer) .gradientNormalizationThreshold( 100 ) //.weightInitFn( new WeightInitXavier() ) //this is internal + .weightNoise(new WeightNoise(new NormalDistribution(0.5, 0.5))) .weightInit( WeightInit.XAVIER) //.activationFn( new ActivationIdentity()) //this is internal .activation( Activation.IDENTITY ) @@ -232,10 +236,10 @@ public class App { copyParams(gen, dis, gan); - //gen.setListeners(new PerformanceListener(10, true)); - //dis.setListeners(new PerformanceListener(10, true)); - //gan.setListeners(new PerformanceListener(10, true)); - gan.setListeners(new ScoreToChartListener("gan")); + gen.addTrainingListeners(new PerformanceListener(10, true)); + dis.addTrainingListeners(new PerformanceListener(10, true)); + gan.addTrainingListeners(new PerformanceListener(10, true)); + gan.addTrainingListeners(new ScoreToChartListener("gan")); //dis.setListeners(new ScoreToChartListener("dis")); gan.fit(Nd4j.rand(batchSize, CHANNELS, X_DIM, Y_DIM), Nd4j.zeros(batchSize, 1)); @@ -322,23 +326,25 @@ public class App { int genLayerCount = gen.getLayers().length; for (int i = 0; i < gan.getLayers().length; i++) { if (i < genLayerCount) { - gen.getLayer(i).setParams(gan.getLayer(i).params()); + if(gan.getLayer(i).getParams() != null) + gen.getLayer(i).setParams(gan.getLayer(i).getParams()); } else { - dis.getLayer(i - genLayerCount).setParams(gan.getLayer(i).params()); + if(gan.getLayer(i).getParams() != null) + dis.getLayer(i - genLayerCount).setParams(gan.getLayer(i).getParams()); } } } private static void updateGen(MultiLayerNetwork gen, MultiLayerNetwork gan) { for (int i = 0; i < gen.getLayers().length; i++) { - gen.getLayer(i).setParams(gan.getLayer(i).params()); + gen.getLayer(i).setParams(gan.getLayer(i).getParams()); } } private static void updateGan(MultiLayerNetwork gen, MultiLayerNetwork dis, MultiLayerNetwork gan) { int genLayerCount = gen.getLayers().length; for (int i = genLayerCount; i < gan.getLayers().length; i++) { - gan.getLayer(i).setParams(dis.getLayer(i - genLayerCount).params()); + gan.getLayer(i).setParams(dis.getLayer(i - genLayerCount).getParams()); } } diff --git a/brutex-extended-tests/src/test/java/net/brutex/gan/GAN.java b/brutex-extended-tests/src/test/java/net/brutex/gan/GAN.java index b1e780d59..41eb277d7 100644 --- a/brutex-extended-tests/src/test/java/net/brutex/gan/GAN.java +++ b/brutex-extended-tests/src/test/java/net/brutex/gan/GAN.java @@ -115,15 +115,15 @@ public class GAN { public void setGeneratorListeners(BaseTrainingListener[] listeners) { - generator.setListeners(listeners); + generator.addTrainingListeners(listeners); } public void setDiscriminatorListeners(BaseTrainingListener[] listeners) { - discriminator.setListeners(listeners); + discriminator.addTrainingListeners(listeners); } public void setGanListeners(BaseTrainingListener[] listeners) { - gan.setListeners(listeners); + gan.addTrainingListeners(listeners); } public void fit(DataSetIterator realData, int numEpochs) { @@ -239,9 +239,9 @@ public class GAN { int genLayerCount = generator.getLayers().length; for (int i = 0; i < gan.getLayers().length; i++) { if (i < genLayerCount) { - generator.getLayer(i).setParams(gan.getLayer(i).params()); + generator.getLayer(i).setParams(gan.getLayer(i).getParams()); } else { - discriminator.getLayer(i - genLayerCount).setParams(gan.getLayer(i).params()); + discriminator.getLayer(i - genLayerCount).setParams(gan.getLayer(i).getParams()); } } } @@ -252,7 +252,7 @@ public class GAN { */ private void updateGeneratorFromGan() { for (int i = 0; i < generator.getLayers().length; i++) { - generator.getLayer(i).setParams(gan.getLayer(i).params()); + generator.getLayer(i).setParams(gan.getLayer(i).getParams()); } } @@ -263,7 +263,7 @@ public class GAN { private void updateGanWithDiscriminator() { int genLayerCount = generator.getLayers().length; for (int i = genLayerCount; i < gan.getLayers().length; i++) { - gan.getLayer(i).setParams(discriminator.getLayer(i - genLayerCount).params()); + gan.getLayer(i).setParams(discriminator.getLayer(i - genLayerCount).getParams()); } } diff --git a/brutex-extended-tests/src/test/java/net/brutex/gan/MnistDCGANExample.java b/brutex-extended-tests/src/test/java/net/brutex/gan/MnistDCGANExample.java index 07e6a148a..4dd171fea 100644 --- a/brutex-extended-tests/src/test/java/net/brutex/gan/MnistDCGANExample.java +++ b/brutex-extended-tests/src/test/java/net/brutex/gan/MnistDCGANExample.java @@ -155,8 +155,8 @@ public class MnistDCGANExample { .updater(new RmsProp.Builder().learningRate(0.0008).rmsDecay(1e-8).build()) .build(); - gan.getGenerator().setListeners(new PerformanceListener(1, true)); - gan.getDiscriminator().setListeners(new PerformanceListener(1, true)); + gan.getGenerator().addTrainingListeners(new PerformanceListener(1, true)); + gan.getDiscriminator().addTrainingListeners(new PerformanceListener(1, true)); Nd4j.getMemoryManager().setAutoGcWindow(15 * 1000); diff --git a/brutex-extended-tests/src/test/java/net/brutex/spark/TestServer2.java b/brutex-extended-tests/src/test/java/net/brutex/spark/TestServer2.java index c2d6f739c..db8a74ae7 100644 --- a/brutex-extended-tests/src/test/java/net/brutex/spark/TestServer2.java +++ b/brutex-extended-tests/src/test/java/net/brutex/spark/TestServer2.java @@ -205,7 +205,7 @@ public class TestServer2 { //PostgresStatsStorage psqlStore = new PostgresStatsStorage(); int listenerFrequency = 2; //net.setListeners(new StatsListener(psqlStore, listenerFrequency), new StatsListener(statsStorage, listenerFrequency), new ScoreIterationListener(200)); - net.setListeners(new StatsListener(statsStorage, listenerFrequency), new ScoreIterationListener(200)); + net.addTrainingListeners(new StatsListener(statsStorage, listenerFrequency), new ScoreIterationListener(200)); //Attach the StatsStorage instance to the UI: this allows the contents of the StatsStorage to be visualized diff --git a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/IntegrationTestBaselineGenerator.java b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/IntegrationTestBaselineGenerator.java index 0842ebfd4..8775bfc2e 100644 --- a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/IntegrationTestBaselineGenerator.java +++ b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/IntegrationTestBaselineGenerator.java @@ -290,7 +290,7 @@ public class IntegrationTestBaselineGenerator { for (int i : layersToTrain) { mln.pretrainLayer(i, dsi); } - paramsPostTraining = mln.params(); + paramsPostTraining = mln.getModelParams(); } else if (modelType == ModelType.CG) { String[] layersToTrain = tc.getUnsupervisedTrainLayersCG(); Preconditions.checkState(layersToTrain != null, "ILayer names must not be null"); @@ -298,7 +298,7 @@ public class IntegrationTestBaselineGenerator { for (String i : layersToTrain) { cg.pretrainLayer(i, iter); } - paramsPostTraining = cg.params(); + paramsPostTraining = cg.getModelParams(); } else { throw new UnsupportedOperationException("SameDiff not supported for unsupervised training tests"); } @@ -314,7 +314,7 @@ public class IntegrationTestBaselineGenerator { CollectScoresListener l = new CollectScoresListener(1); if (modelType != ModelType.SAMEDIFF) - m.setListeners(l); + m.addTrainingListeners(l); History h = null; if (modelType == ModelType.MLN) { @@ -349,7 +349,7 @@ public class IntegrationTestBaselineGenerator { } } else { File p = new File(testBaseDir, IntegrationTestRunner.PARAMS_POST_TRAIN_FILENAME); - IntegrationTestRunner.write(m.params(), p); + IntegrationTestRunner.write(m.getModelParams(), p); } } } diff --git a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/IntegrationTestRunner.java b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/IntegrationTestRunner.java index e68751c1b..786c6d6b9 100644 --- a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/IntegrationTestRunner.java +++ b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/IntegrationTestRunner.java @@ -191,7 +191,7 @@ public class IntegrationTestRunner { MultiLayerNetwork loaded = MultiLayerNetwork.load(savedModel, true); assertEquals(loaded.getNetConfiguration(), mln.getNetConfiguration(), "Configs not equal"); - assertEquals( loaded.params(), mln.params(), "Params not equal"); + assertEquals( loaded.getModelParams(), mln.getModelParams(), "Params not equal"); assertEquals( loaded.getParamTable(), mln.getParamTable(), "Param table not equal"); } else if(config instanceof ComputationGraphConfiguration ){ ComputationGraphConfiguration cgc = (ComputationGraphConfiguration) config; @@ -201,7 +201,7 @@ public class IntegrationTestRunner { ComputationGraph loaded = ComputationGraph.load(savedModel, true); assertEquals(loaded.getComputationGraphConfiguration(), cg.getComputationGraphConfiguration(), "Configs not equal" ); - assertEquals( loaded.params(), cg.params(), "Params not equal"); + assertEquals( loaded.getModelParams(), cg.getModelParams(), "Params not equal"); assertEquals(loaded.getParamTable(), cg.getParamTable(), "Param table not equal"); } else if(config instanceof SameDiff){ sd = (SameDiff)config; @@ -389,7 +389,7 @@ public class IntegrationTestRunner { for( int i : layersToTrain){ mln.pretrainLayer(i, dsi); } - paramsPostTraining = mln.params(); + paramsPostTraining = mln.getModelParams(); layers = mln.getLayers(); } else if(modelType == ModelType.CG) { String[] layersToTrain = tc.getUnsupervisedTrainLayersCG(); @@ -398,7 +398,7 @@ public class IntegrationTestRunner { for( String i : layersToTrain){ cg.pretrainLayer(i, iter); } - paramsPostTraining = cg.params(); + paramsPostTraining = cg.getModelParams(); layers = cg.getLayers(); } else { throw new UnsupportedOperationException("Unsupported layerwise pretraining not supported for SameDiff models"); @@ -439,7 +439,7 @@ public class IntegrationTestRunner { CountingMultiDataSetIterator countingIter = new CountingMultiDataSetIterator(trainData, isTbptt, tbpttLength); CollectScoresListener l = new CollectScoresListener(1); if(modelType != ModelType.SAMEDIFF) { - m.setListeners(l); + m.addTrainingListeners(l); } int iterBefore; @@ -519,10 +519,10 @@ public class IntegrationTestRunner { if(modelType != ModelType.SAMEDIFF) { File p = new File(testBaseDir, IntegrationTestRunner.PARAMS_POST_TRAIN_FILENAME); INDArray paramsExp = read(p); - INDArray z = exceedsRelError(m.params(), paramsExp, tc.getMaxRelativeErrorParamsPostTraining(), tc.getMinAbsErrorParamsPostTraining()); + INDArray z = exceedsRelError(m.getModelParams(), paramsExp, tc.getMaxRelativeErrorParamsPostTraining(), tc.getMinAbsErrorParamsPostTraining()); int count = z.sumNumber().intValue(); if (count > 0) { - logFailedParams(20, "Parameter", layers, z, paramsExp, m.params()); + logFailedParams(20, "Parameter", layers, z, paramsExp, m.getModelParams()); } assertEquals( 0, count, "Number of params exceeded max relative error"); } else { @@ -607,12 +607,12 @@ public class IntegrationTestRunner { ModelSerializer.writeModel(m, f, true); MultiLayerNetwork restored = MultiLayerNetwork.load(f, true); assertEquals(mln.getNetConfiguration(), restored.getNetConfiguration()); - assertEquals(mln.params(), restored.params()); + assertEquals(mln.getModelParams(), restored.getModelParams()); } else if(modelType == ModelType.CG){ ModelSerializer.writeModel(m, f, true); ComputationGraph restored = ComputationGraph.load(f, true); assertEquals(cg.getComputationGraphConfiguration(), restored.getComputationGraphConfiguration()); - assertEquals(cg.params(), restored.params()); + assertEquals(cg.getModelParams(), restored.getModelParams()); } else { sd.save(f, true); SameDiff restored = SameDiff.load(f, true); diff --git a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/TestUtils.java b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/TestUtils.java index 5bdae5d39..60e314d71 100644 --- a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/TestUtils.java +++ b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/TestUtils.java @@ -49,7 +49,7 @@ public class TestUtils { restored = ModelSerializer.restoreMultiLayerNetwork(bais, true); assertEquals(net.getNetConfiguration(), restored.getNetConfiguration()); - assertEquals(net.params(), restored.params()); + assertEquals(net.getModelParams(), restored.getModelParams()); } catch (IOException e){ //Should never happen throw new RuntimeException(e); @@ -74,7 +74,7 @@ public class TestUtils { restored = ModelSerializer.restoreComputationGraph(bais, true); assertEquals(net.getComputationGraphConfiguration(), restored.getComputationGraphConfiguration()); - assertEquals(net.params(), restored.params()); + assertEquals(net.getModelParams(), restored.getModelParams()); } catch (IOException e){ //Should never happen throw new RuntimeException(e); diff --git a/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/activations/impl/ActivationIdentity.java b/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/activations/impl/ActivationIdentity.java index 46124c636..0a2c48fee 100644 --- a/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/activations/impl/ActivationIdentity.java +++ b/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/activations/impl/ActivationIdentity.java @@ -26,7 +26,7 @@ import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.activations.BaseActivationFunction; import org.nd4j.linalg.api.ndarray.INDArray; -/** +/** The ActivationIdentity activation function, just returns the input as is. * f(x) = x */ @EqualsAndHashCode(callSuper = false) diff --git a/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/workspace/BaseWorkspaceMgr.java b/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/workspace/BaseWorkspaceMgr.java index 9baf97578..a0f45a6d1 100644 --- a/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/workspace/BaseWorkspaceMgr.java +++ b/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/workspace/BaseWorkspaceMgr.java @@ -195,7 +195,7 @@ public abstract class BaseWorkspaceMgr> implements WorkspaceMg } @Override - public INDArray validateArrayLocation(@NonNull T arrayType, @NonNull INDArray array, boolean migrateIfInvalid, boolean exceptionIfDetached) { + public INDArray validateArrayLocation(T arrayType, INDArray array, boolean migrateIfInvalid, boolean exceptionIfDetached) { validateConfig(arrayType); if(scopeOutOfWs.contains(arrayType)){ diff --git a/cavis-dnn/cavis-dnn-core/build.gradle b/cavis-dnn/cavis-dnn-core/build.gradle index 18c322532..e40b8482f 100644 --- a/cavis-dnn/cavis-dnn-core/build.gradle +++ b/cavis-dnn/cavis-dnn-core/build.gradle @@ -19,6 +19,7 @@ dependencies { testImplementation projects.cavisNative.cavisNativeCommon testImplementation projects.cavisNd4j.cavisNd4jCommonTests testImplementation projects.cavisDnn.cavisDnnCommonTests + testImplementation projects.cavisDnn.cavisDnnNn implementation "org.apache.commons:commons-lang3" diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/LayerHelperValidationUtil.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/LayerHelperValidationUtil.java index db11f8cc7..308b7c7ad 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/LayerHelperValidationUtil.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/LayerHelperValidationUtil.java @@ -116,7 +116,7 @@ public class LayerHelperValidationUtil { MultiLayerNetwork net2With = new MultiLayerNetwork(netOrig.getNetConfiguration().clone()); net2With.init(); - net2With.params().assign(netOrig.params()); + net2With.getModelParams().assign(netOrig.getModelParams()); log.info("Removing all except for specified helpers from network copy 2: " + t.getAllowHelpersForClasses()); removeHelpers(net2With.getLayers(), t.getAllowHelpersForClasses()); @@ -124,7 +124,7 @@ public class LayerHelperValidationUtil { Preconditions.checkNotNull(t.getFeatures(), "Features are not set (null)"); for (boolean train : new boolean[]{false, true}) { - assertEquals(net1NoHelper.params(), net2With.params()); + assertEquals(net1NoHelper.getModelParams(), net2With.getModelParams()); String s = "Feed forward test - " + t.getTestName() + " - " + (train ? "Train: " : "Test: "); List ff1; try { @@ -180,7 +180,7 @@ public class LayerHelperValidationUtil { double maxRE = relError.maxNumber().doubleValue(); log.info(s + "Output, max relative error: " + maxRE); - assertEquals(net1NoHelper.params(), net2With.params()); //Check that forward pass does not modify params + assertEquals(net1NoHelper.getModelParams(), net2With.getModelParams()); //Check that forward pass does not modify params assertTrue(maxRE < t.getMaxRelError(), s + "Max RE: " + maxRE); } } @@ -255,24 +255,24 @@ public class LayerHelperValidationUtil { net2With = new MultiLayerNetwork(netOrig.getNetConfiguration().clone()); net2With.init(); - net2With.params().assign(netOrig.params()); + net2With.getModelParams().assign(netOrig.getModelParams()); log.info("Removing all except for specified layer helpers from network copy 2: " + t.getAllowHelpersForClasses()); removeHelpers(net2With.getLayers(), t.getAllowHelpersForClasses()); CollectScoresListener listener = new CollectScoresListener(1); - net2With.setListeners(listener); + net2With.addTrainingListeners(listener); net2With.fit(t.getData()); for( int i=0; i<2; i++ ) { net2With = new MultiLayerNetwork(netOrig.getNetConfiguration().clone()); net2With.init(); - net2With.params().assign(netOrig.params()); + net2With.getModelParams().assign(netOrig.getModelParams()); log.info("Removing all except for specified layer helpers from network copy 2: " + t.getAllowHelpersForClasses()); removeHelpers(net2With.getLayers(), t.getAllowHelpersForClasses()); CollectScoresListener listener2 = new CollectScoresListener(1); - net2With.setListeners(listener2); + net2With.addTrainingListeners(listener2); net2With.fit(t.getData()); DoubleArrayList listOrig = listener.getListScore(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/TestUtils.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/TestUtils.java index 374724ae5..495b21e18 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/TestUtils.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/TestUtils.java @@ -25,7 +25,7 @@ import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.RNNFormat; -import org.deeplearning4j.nn.conf.layers.BaseLayer; +import org.deeplearning4j.nn.conf.layers.BaseLayerConfiguration; import org.deeplearning4j.nn.conf.layers.samediff.AbstractSameDiffLayer; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.layers.convolution.ConvolutionLayer; @@ -67,7 +67,7 @@ public class TestUtils { restored = ModelSerializer.restoreMultiLayerNetwork(bais, true); assertEquals(net.getNetConfiguration(), restored.getNetConfiguration()); - assertEquals(net.params(), restored.params()); + assertEquals(net.getModelParams(), restored.getModelParams()); } catch (IOException e){ //Should never happen throw new RuntimeException(e); @@ -91,7 +91,7 @@ public class TestUtils { restored = ModelSerializer.restoreComputationGraph(bais, true); assertEquals(net.getComputationGraphConfiguration(), restored.getComputationGraphConfiguration()); - assertEquals(net.params(), restored.params()); + assertEquals(net.getModelParams(), restored.getModelParams()); } catch (IOException e){ //Should never happen throw new RuntimeException(e); @@ -205,8 +205,8 @@ public class TestUtils { return null; } - public static L2Regularization getL2Reg(BaseLayer baseLayer){ - return getL2Reg(baseLayer.getRegularization()); + public static L2Regularization getL2Reg(BaseLayerConfiguration baseLayerConfiguration){ + return getL2Reg(baseLayerConfiguration.getRegularization()); } public static L2Regularization getL2Reg(List l){ @@ -218,7 +218,7 @@ public class TestUtils { return null; } - public static WeightDecay getWeightDecayReg(BaseLayer bl){ + public static WeightDecay getWeightDecayReg(BaseLayerConfiguration bl){ return getWeightDecayReg(bl.getRegularization()); } @@ -231,7 +231,7 @@ public class TestUtils { return null; } - public static double getL1(BaseLayer layer) { + public static double getL1(BaseLayerConfiguration layer) { List l = layer.getRegularization(); return getL1(l); } @@ -246,7 +246,7 @@ public class TestUtils { return l1Reg.getL1().valueAt(0,0); } - public static double getL2(BaseLayer layer) { + public static double getL2(BaseLayerConfiguration layer) { List l = layer.getRegularization(); return getL2(l); } @@ -269,7 +269,7 @@ public class TestUtils { return getL2(layer.getRegularization()); } - public static double getWeightDecay(BaseLayer layer) { + public static double getWeightDecay(BaseLayerConfiguration layer) { return getWeightDecayReg(layer.getRegularization()).getCoeff().valueAt(0,0); } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/datasets/iterator/DataSetIteratorTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/datasets/iterator/DataSetIteratorTest.java index f391f35f9..be740689b 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/datasets/iterator/DataSetIteratorTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/datasets/iterator/DataSetIteratorTest.java @@ -32,7 +32,6 @@ import org.deeplearning4j.eval.Evaluation; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.GradientNormalization; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration.NeuralNetConfigurationBuilder; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; @@ -183,7 +182,7 @@ public class DataSetIteratorTest extends BaseDL4JTest { MultiLayerNetwork model = new MultiLayerNetwork(builder.build()); model.init(); - model.setListeners(new ScoreIterationListener(listenerFreq)); + model.addTrainingListeners(new ScoreIterationListener(listenerFreq)); model.fit(lfw.next()); @@ -247,7 +246,7 @@ public class DataSetIteratorTest extends BaseDL4JTest { //model.setListeners(Arrays.asList((TrainingListener) new ScoreIterationListener(listenerFreq))); CollectScoresIterationListener listener = new CollectScoresIterationListener(listenerFreq); - model.setListeners(listener); + model.addTrainingListeners(listener); model.fit(cifar); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/earlystopping/TestEarlyStopping.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/earlystopping/TestEarlyStopping.java index 12e17fa3a..0923ba407 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/earlystopping/TestEarlyStopping.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/earlystopping/TestEarlyStopping.java @@ -226,7 +226,7 @@ public class TestEarlyStopping extends BaseDL4JTest { .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.setListeners(new ScoreIterationListener(1)); + net.addTrainingListeners(new ScoreIterationListener(1)); DataSetIterator irisIter = new IrisDataSetIterator(150, 150); EarlyStoppingModelSaver saver = new InMemoryModelSaver<>(); @@ -255,7 +255,7 @@ public class TestEarlyStopping extends BaseDL4JTest { .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.setListeners(new ScoreIterationListener(1)); + net.addTrainingListeners(new ScoreIterationListener(1)); DataSetIterator irisIter = new IrisDataSetIterator(150, 150); MultipleEpochsIterator mIter = new MultipleEpochsIterator(10, irisIter); @@ -304,7 +304,7 @@ public class TestEarlyStopping extends BaseDL4JTest { .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.setListeners(new ScoreIterationListener(1)); + net.addTrainingListeners(new ScoreIterationListener(1)); DataSetIterator irisIter = new IrisDataSetIterator(150, 150); EarlyStoppingModelSaver saver = new InMemoryModelSaver<>(); @@ -343,7 +343,7 @@ public class TestEarlyStopping extends BaseDL4JTest { .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.setListeners(new ScoreIterationListener(1)); + net.addTrainingListeners(new ScoreIterationListener(1)); DataSetIterator irisIter = new IrisDataSetIterator(150, 150); @@ -386,7 +386,7 @@ public class TestEarlyStopping extends BaseDL4JTest { .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.setListeners(new ScoreIterationListener(1)); + net.addTrainingListeners(new ScoreIterationListener(1)); DataSetIterator irisIter = new IrisDataSetIterator(150, 150); @@ -430,7 +430,7 @@ public class TestEarlyStopping extends BaseDL4JTest { .build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.setListeners(new ScoreIterationListener(1)); + net.addTrainingListeners(new ScoreIterationListener(1)); int nSamples = 100; //Generate the training data INDArray x = Nd4j.linspace(-10, 10, nSamples).reshape(nSamples, 1); @@ -473,7 +473,7 @@ public class TestEarlyStopping extends BaseDL4JTest { .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.setListeners(new ScoreIterationListener(1)); + net.addTrainingListeners(new ScoreIterationListener(1)); DataSetIterator irisIter = new IrisDataSetIterator(150, 150); MultipleEpochsIterator mIter = new MultipleEpochsIterator(10, irisIter); @@ -496,9 +496,9 @@ public class TestEarlyStopping extends BaseDL4JTest { assertEquals(net.getnLayers(), mln.getnLayers()); assertEquals(net.getNetConfiguration().getOptimizationAlgo(), mln.getNetConfiguration().getOptimizationAlgo()); - BaseLayer bl = (BaseLayer) net.getLayerConfiguration(); - assertEquals(bl.getActivationFn().toString(), ((BaseLayer) mln.getLayerConfiguration()).getActivationFn().toString()); - assertEquals(bl.getIUpdater(), ((BaseLayer) mln.getLayerConfiguration()).getIUpdater()); + BaseLayerConfiguration bl = (BaseLayerConfiguration) net.getLayerConfiguration(); + assertEquals(bl.getActivationFn().toString(), ((BaseLayerConfiguration) mln.getLayerConfiguration()).getActivationFn().toString()); + assertEquals(bl.getIUpdater(), ((BaseLayerConfiguration) mln.getLayerConfiguration()).getIUpdater()); } @Test @@ -511,7 +511,7 @@ public class TestEarlyStopping extends BaseDL4JTest { .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.setListeners(new ScoreIterationListener(1)); + net.addTrainingListeners(new ScoreIterationListener(1)); DataSetIterator irisIter = new IrisDataSetIterator(150, 150); EarlyStoppingModelSaver saver = new InMemoryModelSaver<>(); @@ -792,7 +792,7 @@ public class TestEarlyStopping extends BaseDL4JTest { MultiLayerNetwork net = new MultiLayerNetwork(conf); TestListener tl = new TestListener(); - net.setListeners(tl); + net.addTrainingListeners(tl); DataSetIterator irisIter = new IrisDataSetIterator(50, 150); EarlyStoppingModelSaver saver = new InMemoryModelSaver<>(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/earlystopping/TestEarlyStoppingCompGraph.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/earlystopping/TestEarlyStoppingCompGraph.java index fb55e2957..22b739f89 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/earlystopping/TestEarlyStoppingCompGraph.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/earlystopping/TestEarlyStoppingCompGraph.java @@ -84,7 +84,7 @@ public class TestEarlyStoppingCompGraph extends BaseDL4JTest { .lossFunction(LossFunctions.LossFunction.MCXENT).build(), "in") .setOutputs("0").build(); ComputationGraph net = new ComputationGraph(conf); - net.setListeners(new ScoreIterationListener(1)); + net.addTrainingListeners(new ScoreIterationListener(1)); DataSetIterator irisIter = new IrisDataSetIterator(150, 150); EarlyStoppingModelSaver saver = new InMemoryModelSaver<>(); @@ -128,7 +128,7 @@ public class TestEarlyStoppingCompGraph extends BaseDL4JTest { .lossFunction(LossFunctions.LossFunction.MCXENT).build(), "in") .setOutputs("0").build(); ComputationGraph net = new ComputationGraph(conf); - net.setListeners(new ScoreIterationListener(1)); + net.addTrainingListeners(new ScoreIterationListener(1)); DataSetIterator irisIter = new IrisDataSetIterator(150, 150); EarlyStoppingModelSaver saver = new InMemoryModelSaver<>(); @@ -165,7 +165,7 @@ public class TestEarlyStoppingCompGraph extends BaseDL4JTest { .lossFunction(LossFunctions.LossFunction.MCXENT).build(), "in") .setOutputs("0").build(); ComputationGraph net = new ComputationGraph(conf); - net.setListeners(new ScoreIterationListener(1)); + net.addTrainingListeners(new ScoreIterationListener(1)); DataSetIterator irisIter = new IrisDataSetIterator(150, 150); @@ -207,7 +207,7 @@ public class TestEarlyStoppingCompGraph extends BaseDL4JTest { .lossFunction(LossFunctions.LossFunction.MCXENT).build(), "in") .setOutputs("0").build(); ComputationGraph net = new ComputationGraph(conf); - net.setListeners(new ScoreIterationListener(1)); + net.addTrainingListeners(new ScoreIterationListener(1)); DataSetIterator irisIter = new IrisDataSetIterator(150, 150); @@ -241,7 +241,7 @@ public class TestEarlyStoppingCompGraph extends BaseDL4JTest { .lossFunction(LossFunctions.LossFunction.MCXENT).build(), "in") .setOutputs("0").build(); ComputationGraph net = new ComputationGraph(conf); - net.setListeners(new ScoreIterationListener(1)); + net.addTrainingListeners(new ScoreIterationListener(1)); DataSetIterator irisIter = new IrisDataSetIterator(150, 150); EarlyStoppingModelSaver saver = new InMemoryModelSaver<>(); @@ -538,7 +538,7 @@ public class TestEarlyStoppingCompGraph extends BaseDL4JTest { ComputationGraph net = new ComputationGraph(conf); TestEarlyStopping.TestListener tl = new TestEarlyStopping.TestListener(); - net.setListeners(tl); + net.addTrainingListeners(tl); DataSetIterator irisIter = new IrisDataSetIterator(50, 150); EarlyStoppingModelSaver saver = new InMemoryModelSaver<>(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/EvalTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/EvalTest.java index 8b5f5d46b..04d6f440f 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/EvalTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/EvalTest.java @@ -84,7 +84,7 @@ public class EvalTest extends BaseDL4JTest { // Instantiate model MultiLayerNetwork model = new MultiLayerNetwork(conf); model.init(); - model.addListeners(new ScoreIterationListener(1)); + model.addTrainingListeners(new ScoreIterationListener(1)); // Train-test split DataSetIterator iter = new IrisDataSetIterator(150, 150); @@ -324,7 +324,7 @@ public class EvalTest extends BaseDL4JTest { MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); net2.init(); - net2.setParams(net1.params()); + net2.setParams(net1.getModelParams()); for(boolean useMask : new boolean[]{false, true}) { @@ -405,7 +405,7 @@ public class EvalTest extends BaseDL4JTest { ComputationGraph net2 = new ComputationGraph(conf2); net2.init(); - net2.setParams(net1.params()); + net2.setParams(net1.getModelParams()); for (boolean useMask : new boolean[]{false, true}) { @@ -492,7 +492,7 @@ public class EvalTest extends BaseDL4JTest { DataSetIterator iter = new IrisDataSetIterator(30, 150); DataSetIterator iterTest = new IrisDataSetIterator(30, 150); - net.setListeners(new EvaluativeListener(iterTest, 3)); + net.addTrainingListeners(new EvaluativeListener(iterTest, 3)); for( int i=0; i<3; i++ ){ net.fit(iter); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/BNGradientCheckTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/BNGradientCheckTest.java index 5e6ed72bd..0380ed2a0 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/BNGradientCheckTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/BNGradientCheckTest.java @@ -26,7 +26,6 @@ import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration.NeuralNetConfigurationBuilder; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.distribution.UniformDistribution; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -219,11 +218,11 @@ public class BNGradientCheckTest extends BaseDL4JTest { mln.setInput(ds.getFeatures()); mln.setLabels(ds.getLabels()); mln.computeGradientAndScore(); - double scoreBefore = mln.score(); + double scoreBefore = mln.getScore(); for (int k = 0; k < 20; k++) mln.fit(ds); mln.computeGradientAndScore(); - double scoreAfter = mln.score(); + double scoreAfter = mln.getScore(); //Can't test in 'characteristic mode of operation' if not learning String msg = name + " - score did not (sufficiently) decrease during learning - activationFn=" @@ -323,11 +322,11 @@ public class BNGradientCheckTest extends BaseDL4JTest { mln.setInput(ds.getFeatures()); mln.setLabels(ds.getLabels()); mln.computeGradientAndScore(); - double scoreBefore = mln.score(); + double scoreBefore = mln.getScore(); for (int k = 0; k < 10; k++) mln.fit(ds); mln.computeGradientAndScore(); - double scoreAfter = mln.score(); + double scoreAfter = mln.getScore(); //Can't test in 'characteristic mode of operation' if not learning String msg = name + " - score did not (sufficiently) decrease during learning - activationFn=" @@ -554,11 +553,11 @@ public class BNGradientCheckTest extends BaseDL4JTest { net.setInput(0, ds.getFeatures()); net.setLabels(ds.getLabels()); net.computeGradientAndScore(); - double scoreBefore = net.score(); + double scoreBefore = net.getScore(); for (int k = 0; k < 20; k++) net.fit(ds); net.computeGradientAndScore(); - double scoreAfter = net.score(); + double scoreAfter = net.getScore(); //Can't test in 'characteristic mode of operation' if not learning String msg = name + " - score did not (sufficiently) decrease during learning - activationFn=" diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java index bee788e55..d11bd33c6 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java @@ -27,7 +27,6 @@ import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration.NeuralNetConfigurationBuilder; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.*; @@ -120,11 +119,11 @@ public class CNNGradientCheckTest extends BaseDL4JTest { mln.setInput(ds.getFeatures()); mln.setLabels(ds.getLabels()); mln.computeGradientAndScore(); - double scoreBefore = mln.score(); + double scoreBefore = mln.getScore(); for (int j = 0; j < 10; j++) mln.fit(ds); mln.computeGradientAndScore(); - double scoreAfter = mln.score(); + double scoreAfter = mln.getScore(); //Can't test in 'characteristic mode of operation' if not learning String msg = name + " - score did not (sufficiently) decrease during learning - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation @@ -212,11 +211,11 @@ public class CNNGradientCheckTest extends BaseDL4JTest { mln.setInput(ds.getFeatures()); mln.setLabels(ds.getLabels()); mln.computeGradientAndScore(); - double scoreBefore = mln.score(); + double scoreBefore = mln.getScore(); for (int j = 0; j < 10; j++) mln.fit(ds); mln.computeGradientAndScore(); - double scoreAfter = mln.score(); + double scoreAfter = mln.getScore(); //Can't test in 'characteristic mode of operation' if not learning String msg = testName + "- score did not (sufficiently) decrease during learning - activationFn=" diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTests.java index 6cefb32aa..39dc54659 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTests.java @@ -105,11 +105,11 @@ public class GradientCheckTests extends BaseDL4JTest { mln.setInput(ds.getFeatures()); mln.setLabels(ds.getLabels()); mln.computeGradientAndScore(); - double scoreBefore = mln.score(); + double scoreBefore = mln.getScore(); for (int j = 0; j < 10; j++) mln.fit(ds); mln.computeGradientAndScore(); - double scoreAfter = mln.score(); + double scoreAfter = mln.getScore(); //Can't test in 'characteristic mode of operation' if not learning String msg = "testMinibatchApplication() - score did not (sufficiently) decrease during learning - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation @@ -184,11 +184,11 @@ public class GradientCheckTests extends BaseDL4JTest { mln.setInput(ds.getFeatures()); mln.setLabels(ds.getLabels()); mln.computeGradientAndScore(); - double scoreBefore = mln.score(); + double scoreBefore = mln.getScore(); for (int j = 0; j < 10; j++) mln.fit(ds); mln.computeGradientAndScore(); - double scoreAfter = mln.score(); + double scoreAfter = mln.getScore(); //Can't test in 'characteristic mode of operation' if not learning String msg = "testGradMLP2LayerIrisSimple() - score did not (sufficiently) decrease during learning - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation @@ -278,11 +278,11 @@ public class GradientCheckTests extends BaseDL4JTest { mln.setInput(ds.getFeatures()); mln.setLabels(ds.getLabels()); mln.computeGradientAndScore(); - double scoreBefore = mln.score(); + double scoreBefore = mln.getScore(); for (int j = 0; j < 10; j++) mln.fit(ds); mln.computeGradientAndScore(); - double scoreAfter = mln.score(); + double scoreAfter = mln.getScore(); //Can't test in 'characteristic mode of operation' if not learning String msg = "testGradMLP2LayerIrisSimple() - score did not (sufficiently) decrease during learning - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation @@ -452,11 +452,11 @@ public class GradientCheckTests extends BaseDL4JTest { mln.setInput(ds.getFeatures()); mln.setLabels(ds.getLabels()); mln.computeGradientAndScore(); - double scoreBefore = mln.score(); + double scoreBefore = mln.getScore(); for (int j = 0; j < 10; j++) mln.fit(ds); mln.computeGradientAndScore(); - double scoreAfter = mln.score(); + double scoreAfter = mln.getScore(); //Can't test in 'characteristic mode of operation' if not learning msg = "testGradMLP2LayerIrisSimple() - score did not (sufficiently) decrease during learning - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation @@ -523,13 +523,13 @@ public class GradientCheckTests extends BaseDL4JTest { netGraph.setInputs(features); netGraph.setLabels(labels); netGraph.computeGradientAndScore(); - double scoreBefore = netGraph.score(); + double scoreBefore = netGraph.getScore(); String msg; for (int epoch = 0; epoch < 5; epoch++) netGraph.fit(new INDArray[]{features}, new INDArray[]{labels}); netGraph.computeGradientAndScore(); - double scoreAfter = netGraph.score(); + double scoreAfter = netGraph.getScore(); //Can't test in 'characteristic mode of operation' if not learning msg = "elementWiseMultiplicationLayerTest() - score did not (sufficiently) decrease during learning - activationFn=" + "Id" + ", lossFn=" + "Cos-sim" + ", outputActivation=" + "Id" @@ -757,11 +757,11 @@ public class GradientCheckTests extends BaseDL4JTest { mln.setInput(ds.getFeatures()); mln.setLabels(ds.getLabels()); mln.computeGradientAndScore(); - double scoreBefore = mln.score(); + double scoreBefore = mln.getScore(); for (int j = 0; j < 10; j++) mln.fit(ds); mln.computeGradientAndScore(); - double scoreAfter = mln.score(); + double scoreAfter = mln.getScore(); //Can't test in 'characteristic mode of operation' if not learning String msg = "testGradMLP2LayerIrisSimple() - score did not (sufficiently) decrease during learning - activationFn=" + afn + ", lossFn=" + lf + ", layerNorm=" + layerNorm + ", outputActivation=" + outputActivation diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/LossFunctionGradientCheck.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/LossFunctionGradientCheck.java index 0cf7ebd1b..6197f73d3 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/LossFunctionGradientCheck.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/LossFunctionGradientCheck.java @@ -666,7 +666,7 @@ public class LossFunctionGradientCheck extends BaseDL4JTest { net.init(); //Check params to avoid test flakiness on small or large params - INDArray params = net.params(); + INDArray params = net.getModelParams(); for( int x=0; x 1.5){ double d = Nd4j.getRandom().nextDouble(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/MultiLayerNeuralNetConfigurationTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/MultiLayerNeuralNetConfigurationTest.java index 700b70a6b..eead1511f 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/MultiLayerNeuralNetConfigurationTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/MultiLayerNeuralNetConfigurationTest.java @@ -37,10 +37,9 @@ import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.exception.DL4JInvalidConfigException; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.OptimizationAlgorithm; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration.NeuralNetConfigurationBuilder; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.conf.layers.BaseLayer; +import org.deeplearning4j.nn.conf.layers.BaseLayerConfiguration; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.GlobalPoolingLayer; @@ -254,8 +253,8 @@ public class MultiLayerNeuralNetConfigurationTest extends BaseDL4JTest { MultiLayerNetwork model2 = new MultiLayerNetwork(getConf()); model2.init(); - float[] p1 = model1.params().data().asFloat(); - float[] p2 = model2.params().data().asFloat(); + float[] p1 = model1.getModelParams().data().asFloat(); + float[] p2 = model2.getModelParams().data().asFloat(); System.out.println(Arrays.toString(p1)); System.out.println(Arrays.toString(p2)); @@ -266,20 +265,20 @@ public class MultiLayerNeuralNetConfigurationTest extends BaseDL4JTest { public void testTrainingListener() { MultiLayerNetwork model1 = new MultiLayerNetwork(getConf()); model1.init(); - model1.addListeners(new ScoreIterationListener(1)); + model1.addTrainingListeners(new ScoreIterationListener(1)); MultiLayerNetwork model2 = new MultiLayerNetwork(getConf()); - model2.addListeners(new ScoreIterationListener(1)); + model2.addTrainingListeners(new ScoreIterationListener(1)); model2.init(); Layer[] l1 = model1.getLayers(); for (int i = 0; i < l1.length; i++) { - assertTrue(l1[i].getListeners() != null && l1[i].getListeners().size() == 1); + assertTrue(l1[i].getTrainingListeners() != null && l1[i].getTrainingListeners().size() == 1); } Layer[] l2 = model2.getLayers(); for (int i = 0; i < l2.length; i++) { - assertTrue(l2[i].getListeners() != null && l2[i].getListeners().size() == 1); + assertTrue(l2[i].getTrainingListeners() != null && l2[i].getTrainingListeners().size() == 1); } } @@ -384,10 +383,10 @@ public class MultiLayerNeuralNetConfigurationTest extends BaseDL4JTest { .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).build()) .inputType(InputType.convolutional(28, 28, 1)).build(); - org.deeplearning4j.nn.conf.layers.BaseLayer l0 = (BaseLayer) conf.getConf(0).getLayer(); - org.deeplearning4j.nn.conf.layers.BaseLayer l1 = (BaseLayer) conf.getConf(1).getLayer(); - org.deeplearning4j.nn.conf.layers.BaseLayer l2 = (BaseLayer) conf.getConf(2).getLayer(); - org.deeplearning4j.nn.conf.layers.BaseLayer l3 = (BaseLayer) conf.getConf(3).getLayer(); + BaseLayerConfiguration l0 = (BaseLayerConfiguration) conf.getConf(0).getLayer(); + BaseLayerConfiguration l1 = (BaseLayerConfiguration) conf.getConf(1).getLayer(); + BaseLayerConfiguration l2 = (BaseLayerConfiguration) conf.getConf(2).getLayer(); + BaseLayerConfiguration l3 = (BaseLayerConfiguration) conf.getConf(3).getLayer(); assertEquals(0.5, ((Adam) l0.getUpdaterByParam("b")).getLearningRate(), 1e-6); assertEquals(1e-2, ((Adam) l0.getUpdaterByParam("W")).getLearningRate(), 1e-6); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/NeuralNetConfigurationTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/NeuralNetConfigurationTest.java index 6a7ec6408..0ef220c25 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/NeuralNetConfigurationTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/NeuralNetConfigurationTest.java @@ -25,7 +25,7 @@ import org.deeplearning4j.TestUtils; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; -import org.deeplearning4j.nn.conf.layers.BaseLayer; +import org.deeplearning4j.nn.conf.layers.BaseLayerConfiguration; import org.deeplearning4j.nn.conf.layers.BatchNormalization; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; @@ -100,7 +100,7 @@ public class NeuralNetConfigurationTest extends BaseDL4JTest { @Test public void testClone() { NeuralNetConfiguration conf = getConfig(1, 1, new WeightInitUniform(), true); - BaseLayer bl = (BaseLayer) conf.getFlattenedLayerConfigurations().get(0); + BaseLayerConfiguration bl = (BaseLayerConfiguration) conf.getFlattenedLayerConfigurations().get(0); conf.setStepFunction(new DefaultStepFunction()); NeuralNetConfiguration conf2 = conf.clone(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/graph/ShiftVertexTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/graph/ShiftVertexTest.java index 9cf99a89c..be78b1ecf 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/graph/ShiftVertexTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/graph/ShiftVertexTest.java @@ -158,7 +158,7 @@ public class ShiftVertexTest extends BaseDL4JTest { cg.setInput(0, input); cg.setLabel(0, target); cg.computeGradientAndScore(); - double score_dl4j = cg.score(); + double score_dl4j = cg.getScore(); Map weights = cg.getParamTable(); Gradient g = cg.gradient(); Map gradients = g.gradientForVariable(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigTest.java index db3731f6d..28d17c150 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigTest.java @@ -72,8 +72,8 @@ public class LayerConfigTest extends BaseDL4JTest { MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); - assertEquals("relu", ((BaseLayer) conf.getConf(0).getLayer()).getActivationFn().toString()); - assertEquals("relu", ((BaseLayer) conf.getConf(1).getLayer()).getActivationFn().toString()); + assertEquals("relu", ((BaseLayerConfiguration) conf.getConf(0).getLayer()).getActivationFn().toString()); + assertEquals("relu", ((BaseLayerConfiguration) conf.getConf(1).getLayer()).getActivationFn().toString()); //With conf = NeuralNetConfiguration.builder().activation(Activation.RELU) @@ -83,8 +83,8 @@ public class LayerConfigTest extends BaseDL4JTest { net = new MultiLayerNetwork(conf); net.init(); - assertEquals("relu", ((BaseLayer) conf.getConf(0).getLayer()).getActivationFn().toString()); - assertEquals("tanh", ((BaseLayer) conf.getConf(1).getLayer()).getActivationFn().toString()); + assertEquals("relu", ((BaseLayerConfiguration) conf.getConf(0).getLayer()).getActivationFn().toString()); + assertEquals("tanh", ((BaseLayerConfiguration) conf.getConf(1).getLayer()).getActivationFn().toString()); } @@ -99,11 +99,11 @@ public class LayerConfigTest extends BaseDL4JTest { MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); - assertEquals(new WeightInitDistribution(defaultDistribution), ((BaseLayer) conf.getConf(0).getLayer()).getWeightInitFn()); - assertEquals(new WeightInitDistribution(defaultDistribution), ((BaseLayer) conf.getConf(1).getLayer()).getWeightInitFn()); + assertEquals(new WeightInitDistribution(defaultDistribution), ((BaseLayerConfiguration) conf.getConf(0).getLayer()).getWeightInitFn()); + assertEquals(new WeightInitDistribution(defaultDistribution), ((BaseLayerConfiguration) conf.getConf(1).getLayer()).getWeightInitFn()); - assertEquals(1, ((BaseLayer) conf.getConf(0).getLayer()).getBiasInit(), 0.0); - assertEquals(1, ((BaseLayer) conf.getConf(1).getLayer()).getBiasInit(), 0.0); + assertEquals(1, ((BaseLayerConfiguration) conf.getConf(0).getLayer()).getBiasInit(), 0.0); + assertEquals(1, ((BaseLayerConfiguration) conf.getConf(1).getLayer()).getBiasInit(), 0.0); //With: final Distribution overriddenDistribution = new UniformDistribution(0, 1); @@ -117,11 +117,11 @@ public class LayerConfigTest extends BaseDL4JTest { net = new MultiLayerNetwork(conf); net.init(); - assertEquals(new WeightInitDistribution(defaultDistribution), ((BaseLayer) conf.getConf(0).getLayer()).getWeightInitFn()); - assertEquals(new WeightInitDistribution(overriddenDistribution), ((BaseLayer) conf.getConf(1).getLayer()).getWeightInitFn()); + assertEquals(new WeightInitDistribution(defaultDistribution), ((BaseLayerConfiguration) conf.getConf(0).getLayer()).getWeightInitFn()); + assertEquals(new WeightInitDistribution(overriddenDistribution), ((BaseLayerConfiguration) conf.getConf(1).getLayer()).getWeightInitFn()); - assertEquals(1, ((BaseLayer) conf.getConf(0).getLayer()).getBiasInit(), 0.0); - assertEquals(0, ((BaseLayer) conf.getConf(1).getLayer()).getBiasInit(), 0.0); + assertEquals(1, ((BaseLayerConfiguration) conf.getConf(0).getLayer()).getBiasInit(), 0.0); + assertEquals(0, ((BaseLayerConfiguration) conf.getConf(1).getLayer()).getBiasInit(), 0.0); } /* @@ -137,8 +137,8 @@ public class LayerConfigTest extends BaseDL4JTest { MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); - assertEquals(0.3, ((BaseLayer) conf.getConf(0).getLayer()).getLearningRate(), 0.0); - assertEquals(0.3, ((BaseLayer) conf.getConf(1).getLayer()).getLearningRate(), 0.0); + assertEquals(0.3, ((BaseLayerConfiguration) conf.getConf(0).getLayer()).getLearningRate(), 0.0); + assertEquals(0.3, ((BaseLayerConfiguration) conf.getConf(1).getLayer()).getLearningRate(), 0.0); //With: conf = NeuralNetConfiguration.builder().learningRate(0.3) @@ -148,8 +148,8 @@ public class LayerConfigTest extends BaseDL4JTest { net = new MultiLayerNetwork(conf); net.init(); - assertEquals(0.3, ((BaseLayer) conf.getConf(0).getLayer()).getLearningRate(), 0.0); - assertEquals(0.2, ((BaseLayer) conf.getConf(1).getLayer()).getLearningRate(), 0.0); + assertEquals(0.3, ((BaseLayerConfiguration) conf.getConf(0).getLayer()).getLearningRate(), 0.0); + assertEquals(0.2, ((BaseLayerConfiguration) conf.getConf(1).getLayer()).getLearningRate(), 0.0); //L1 and L2 without layerwise override: conf = NeuralNetConfiguration.builder().l1(0.1).l2(0.2) @@ -158,10 +158,10 @@ public class LayerConfigTest extends BaseDL4JTest { net = new MultiLayerNetwork(conf); net.init(); - assertEquals(0.1, ((BaseLayer) conf.getConf(0).getLayer()).getL1(), 0.0); - assertEquals(0.1, ((BaseLayer) conf.getConf(1).getLayer()).getL1(), 0.0); - assertEquals(0.2, ((BaseLayer) conf.getConf(0).getLayer()).getL2(), 0.0); - assertEquals(0.2, ((BaseLayer) conf.getConf(1).getLayer()).getL2(), 0.0); + assertEquals(0.1, ((BaseLayerConfiguration) conf.getConf(0).getLayer()).getL1(), 0.0); + assertEquals(0.1, ((BaseLayerConfiguration) conf.getConf(1).getLayer()).getL1(), 0.0); + assertEquals(0.2, ((BaseLayerConfiguration) conf.getConf(0).getLayer()).getL2(), 0.0); + assertEquals(0.2, ((BaseLayerConfiguration) conf.getConf(1).getLayer()).getL2(), 0.0); //L1 and L2 with layerwise override: conf = NeuralNetConfiguration.builder().l1(0.1).l2(0.2) @@ -170,10 +170,10 @@ public class LayerConfigTest extends BaseDL4JTest { net = new MultiLayerNetwork(conf); net.init(); - assertEquals(0.9, ((BaseLayer) conf.getConf(0).getLayer()).getL1(), 0.0); - assertEquals(0.1, ((BaseLayer) conf.getConf(1).getLayer()).getL1(), 0.0); - assertEquals(0.2, ((BaseLayer) conf.getConf(0).getLayer()).getL2(), 0.0); - assertEquals(0.8, ((BaseLayer) conf.getConf(1).getLayer()).getL2(), 0.0); + assertEquals(0.9, ((BaseLayerConfiguration) conf.getConf(0).getLayer()).getL1(), 0.0); + assertEquals(0.1, ((BaseLayerConfiguration) conf.getConf(1).getLayer()).getL1(), 0.0); + assertEquals(0.2, ((BaseLayerConfiguration) conf.getConf(0).getLayer()).getL2(), 0.0); + assertEquals(0.8, ((BaseLayerConfiguration) conf.getConf(1).getLayer()).getL2(), 0.0); }*/ @@ -213,8 +213,8 @@ public class LayerConfigTest extends BaseDL4JTest { MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); - assertEquals(0.1, ((Nesterovs)((BaseLayer) conf.getConf(0).getLayer()).getIUpdater()).getMomentumISchedule().valueAt(0,0), 0.0); - assertEquals(0.1, ((Nesterovs)((BaseLayer) conf.getConf(1).getLayer()).getIUpdater()).getMomentumISchedule().valueAt(0,0), 0.0); + assertEquals(0.1, ((Nesterovs)((BaseLayerConfiguration) conf.getConf(0).getLayer()).getIUpdater()).getMomentumISchedule().valueAt(0,0), 0.0); + assertEquals(0.1, ((Nesterovs)((BaseLayerConfiguration) conf.getConf(1).getLayer()).getIUpdater()).getMomentumISchedule().valueAt(0,0), 0.0); Map testMomentumAfter2 = new HashMap<>(); testMomentumAfter2.put(0, 0.2); @@ -227,8 +227,8 @@ public class LayerConfigTest extends BaseDL4JTest { net = new MultiLayerNetwork(conf); net.init(); - assertEquals(0.1, ((Nesterovs)((BaseLayer) conf.getConf(0).getLayer()).getIUpdater()).getMomentumISchedule().valueAt(0,0), 0.0); - assertEquals(0.2, ((Nesterovs)((BaseLayer) conf.getConf(1).getLayer()).getIUpdater()).getMomentumISchedule().valueAt(0,0), 0.0); + assertEquals(0.1, ((Nesterovs)((BaseLayerConfiguration) conf.getConf(0).getLayer()).getIUpdater()).getMomentumISchedule().valueAt(0,0), 0.0); + assertEquals(0.2, ((Nesterovs)((BaseLayerConfiguration) conf.getConf(1).getLayer()).getIUpdater()).getMomentumISchedule().valueAt(0,0), 0.0); } @Test @@ -239,10 +239,10 @@ public class LayerConfigTest extends BaseDL4JTest { MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); - assertTrue(((BaseLayer) conf.getConf(0).getLayer()).getIUpdater() instanceof AdaDelta); - assertTrue(((BaseLayer) conf.getConf(1).getLayer()).getIUpdater() instanceof AdaDelta); - assertEquals(0.5, ((AdaDelta)((BaseLayer) conf.getConf(0).getLayer()).getIUpdater()).getRho(), 0.0); - assertEquals(0.01, ((AdaDelta)((BaseLayer) conf.getConf(1).getLayer()).getIUpdater()).getRho(), 0.0); + assertTrue(((BaseLayerConfiguration) conf.getConf(0).getLayer()).getIUpdater() instanceof AdaDelta); + assertTrue(((BaseLayerConfiguration) conf.getConf(1).getLayer()).getIUpdater() instanceof AdaDelta); + assertEquals(0.5, ((AdaDelta)((BaseLayerConfiguration) conf.getConf(0).getLayer()).getIUpdater()).getRho(), 0.0); + assertEquals(0.01, ((AdaDelta)((BaseLayerConfiguration) conf.getConf(1).getLayer()).getIUpdater()).getRho(), 0.0); conf = NeuralNetConfiguration.builder().updater(new RmsProp(1.0, 2.0, RmsProp.DEFAULT_RMSPROP_EPSILON)) .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).updater(new RmsProp(1.0, 1.0, RmsProp.DEFAULT_RMSPROP_EPSILON)).build()) @@ -252,10 +252,10 @@ public class LayerConfigTest extends BaseDL4JTest { net = new MultiLayerNetwork(conf); net.init(); - assertTrue(((BaseLayer) conf.getConf(0).getLayer()).getIUpdater() instanceof RmsProp); - assertTrue(((BaseLayer) conf.getConf(1).getLayer()).getIUpdater() instanceof AdaDelta); - assertEquals(1.0, ((RmsProp) ((BaseLayer) conf.getConf(0).getLayer()).getIUpdater()).getRmsDecay(), 0.0); - assertEquals(0.5, ((AdaDelta) ((BaseLayer) conf.getConf(1).getLayer()).getIUpdater()).getRho(), 0.0); + assertTrue(((BaseLayerConfiguration) conf.getConf(0).getLayer()).getIUpdater() instanceof RmsProp); + assertTrue(((BaseLayerConfiguration) conf.getConf(1).getLayer()).getIUpdater() instanceof AdaDelta); + assertEquals(1.0, ((RmsProp) ((BaseLayerConfiguration) conf.getConf(0).getLayer()).getIUpdater()).getRmsDecay(), 0.0); + assertEquals(0.5, ((AdaDelta) ((BaseLayerConfiguration) conf.getConf(1).getLayer()).getIUpdater()).getRho(), 0.0); } @@ -270,10 +270,10 @@ public class LayerConfigTest extends BaseDL4JTest { MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); - assertEquals(0.5, ((Adam) ((BaseLayer) conf.getConf(0).getLayer()).getIUpdater()).getBeta1(), 0.0); - assertEquals(0.6, ((Adam) ((BaseLayer) conf.getConf(1).getLayer()).getIUpdater()).getBeta1(), 0.0); - assertEquals(0.5, ((Adam) ((BaseLayer) conf.getConf(0).getLayer()).getIUpdater()).getBeta2(), 0.0); - assertEquals(0.7, ((Adam) ((BaseLayer) conf.getConf(1).getLayer()).getIUpdater()).getBeta2(), 0.0); + assertEquals(0.5, ((Adam) ((BaseLayerConfiguration) conf.getConf(0).getLayer()).getIUpdater()).getBeta1(), 0.0); + assertEquals(0.6, ((Adam) ((BaseLayerConfiguration) conf.getConf(1).getLayer()).getIUpdater()).getBeta1(), 0.0); + assertEquals(0.5, ((Adam) ((BaseLayerConfiguration) conf.getConf(0).getLayer()).getIUpdater()).getBeta2(), 0.0); + assertEquals(0.7, ((Adam) ((BaseLayerConfiguration) conf.getConf(1).getLayer()).getIUpdater()).getBeta2(), 0.0); } @Test @@ -287,13 +287,11 @@ public class LayerConfigTest extends BaseDL4JTest { .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); - - assertEquals(GradientNormalization.ClipElementWiseAbsoluteValue, - conf.getConf(0).getLayer().getGradientNormalization()); - assertEquals(GradientNormalization.ClipElementWiseAbsoluteValue, - conf.getConf(1).getLayer().getGradientNormalization()); - assertEquals(10, conf.getConf(0).getLayer().getGradientNormalizationThreshold(), 0.0); - assertEquals(10, conf.getConf(1).getLayer().getGradientNormalizationThreshold(), 0.0); + BaseLayerConfiguration bconf = (BaseLayerConfiguration) conf.getConf(0).getLayer(); + assertEquals(GradientNormalization.ClipElementWiseAbsoluteValue, bconf.getGradientNormalization()); + assertEquals(GradientNormalization.ClipElementWiseAbsoluteValue, bconf.getGradientNormalization()); + assertEquals(10, bconf.getGradientNormalizationThreshold(), 0.0); + assertEquals(10, bconf.getGradientNormalizationThreshold(), 0.0); //With: conf = NeuralNetConfiguration.builder() @@ -308,11 +306,10 @@ public class LayerConfigTest extends BaseDL4JTest { net = new MultiLayerNetwork(conf); net.init(); - assertEquals(GradientNormalization.ClipElementWiseAbsoluteValue, - conf.getConf(0).getLayer().getGradientNormalization()); - assertEquals(GradientNormalization.None, conf.getConf(1).getLayer().getGradientNormalization()); - assertEquals(10, conf.getConf(0).getLayer().getGradientNormalizationThreshold(), 0.0); - assertEquals(2.5, conf.getConf(1).getLayer().getGradientNormalizationThreshold(), 0.0); + assertEquals(GradientNormalization.ClipElementWiseAbsoluteValue, bconf.getGradientNormalization()); + assertEquals(GradientNormalization.None, bconf.getGradientNormalization()); + assertEquals(10, bconf.getGradientNormalizationThreshold(), 0.0); + assertEquals(2.5, bconf.getGradientNormalizationThreshold(), 0.0); } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigValidationTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigValidationTest.java index 65532a0bc..dae839a06 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigValidationTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigValidationTest.java @@ -162,12 +162,12 @@ public class LayerConfigValidationTest extends BaseDL4JTest { MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); - BaseLayer layerConf = (BaseLayer) net.getLayer(0).getLayerConfiguration(); + BaseLayerConfiguration layerConf = (BaseLayerConfiguration) net.getLayer(0).getLayerConfiguration(); assertEquals(expectedMomentum, ((Nesterovs) layerConf.getIUpdater()).getMomentum(), 1e-3); assertNull(TestUtils.getL1Reg(layerConf.getRegularization())); assertEquals(0.5, TestUtils.getL2(layerConf), 1e-3); - BaseLayer layerConf1 = (BaseLayer) net.getLayer(1).getLayerConfiguration(); + BaseLayerConfiguration layerConf1 = (BaseLayerConfiguration) net.getLayer(1).getLayerConfiguration(); assertEquals(0.4, ((Nesterovs) layerConf1.getIUpdater()).getMomentum(), 1e-3); // Adam Updater @@ -178,11 +178,11 @@ public class LayerConfigValidationTest extends BaseDL4JTest { net = new MultiLayerNetwork(conf); net.init(); - layerConf = (BaseLayer) net.getLayer(0).getLayerConfiguration(); + layerConf = (BaseLayerConfiguration) net.getLayer(0).getLayerConfiguration(); assertEquals(0.3, TestUtils.getL1(layerConf), 1e-3); assertEquals(0.5, TestUtils.getL2(layerConf), 1e-3); - layerConf1 = (BaseLayer) net.getLayer(1).getLayerConfiguration(); + layerConf1 = (BaseLayerConfiguration) net.getLayer(1).getLayerConfiguration(); assertEquals(expectedAdamMeanDecay, ((Adam) layerConf1.getIUpdater()).getBeta1(), 1e-3); assertEquals(expectedAdamVarDecay, ((Adam) layerConf1.getIUpdater()).getBeta2(), 1e-3); assertEquals(new WeightInitDistribution(expectedDist), layerConf1.getWeightInitFn()); @@ -196,12 +196,12 @@ public class LayerConfigValidationTest extends BaseDL4JTest { net = new MultiLayerNetwork(conf); net.init(); - layerConf = (BaseLayer) net.getLayer(0).getLayerConfiguration(); + layerConf = (BaseLayerConfiguration) net.getLayer(0).getLayerConfiguration(); assertEquals(expectedRmsDecay, ((RmsProp) layerConf.getIUpdater()).getRmsDecay(), 1e-3); assertNull(TestUtils.getL1Reg(layerConf.getRegularization())); assertNull(TestUtils.getL2Reg(layerConf.getRegularization())); - layerConf1 = (BaseLayer) net.getLayer(1).getLayerConfiguration(); + layerConf1 = (BaseLayerConfiguration) net.getLayer(1).getLayerConfiguration(); assertEquals(0.4, ((RmsProp) layerConf1.getIUpdater()).getRmsDecay(), 1e-3); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/weightnoise/TestWeightNoise.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/weightnoise/TestWeightNoise.java index 4d4b36013..8977d1b3f 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/weightnoise/TestWeightNoise.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/weightnoise/TestWeightNoise.java @@ -29,7 +29,7 @@ import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; -import org.deeplearning4j.nn.conf.layers.BaseLayer; +import org.deeplearning4j.nn.conf.layers.BaseLayerConfiguration; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.graph.ComputationGraph; @@ -75,9 +75,9 @@ public class TestWeightNoise extends BaseDL4JTest { MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); - assertEquals(wn, ((BaseLayer) net.getLayer(0).getLayerConfiguration()).getWeightNoise()); - assertEquals(new DropConnect(0.25), ((BaseLayer) net.getLayer(1).getLayerConfiguration()).getWeightNoise()); - assertEquals(wn, ((BaseLayer) net.getLayer(2).getLayerConfiguration()).getWeightNoise()); + assertEquals(wn, ((BaseLayerConfiguration) net.getLayer(0).getLayerConfiguration()).getWeightNoise()); + assertEquals(new DropConnect(0.25), ((BaseLayerConfiguration) net.getLayer(1).getLayerConfiguration()).getWeightNoise()); + assertEquals(wn, ((BaseLayerConfiguration) net.getLayer(2).getLayerConfiguration()).getWeightNoise()); TestUtils.testModelSerialization(net); @@ -95,9 +95,9 @@ public class TestWeightNoise extends BaseDL4JTest { ComputationGraph graph = new ComputationGraph(conf2); graph.init(); - assertEquals(wn, ((BaseLayer) graph.getLayer(0).getLayerConfiguration()).getWeightNoise()); - assertEquals(new DropConnect(0.25), ((BaseLayer) graph.getLayer(1).getLayerConfiguration()).getWeightNoise()); - assertEquals(wn, ((BaseLayer) graph.getLayer(2).getLayerConfiguration()).getWeightNoise()); + assertEquals(wn, ((BaseLayerConfiguration) graph.getLayer(0).getLayerConfiguration()).getWeightNoise()); + assertEquals(new DropConnect(0.25), ((BaseLayerConfiguration) graph.getLayer(1).getLayerConfiguration()).getWeightNoise()); + assertEquals(wn, ((BaseLayerConfiguration) graph.getLayer(2).getLayerConfiguration()).getWeightNoise()); TestUtils.testModelSerialization(graph); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/dtypes/DTypeTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/dtypes/DTypeTests.java index e37b7b7cb..2f2a316dd 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/dtypes/DTypeTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/dtypes/DTypeTests.java @@ -124,7 +124,7 @@ import org.deeplearning4j.nn.conf.layers.recurrent.TimeDistributed; import org.deeplearning4j.nn.conf.layers.util.MaskLayer; import org.deeplearning4j.nn.conf.layers.util.MaskZeroLayer; import org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder; -import org.deeplearning4j.nn.conf.layers.wrapper.BaseWrapperLayer; +import org.deeplearning4j.nn.conf.layers.wrapper.BaseWrapperLayerConfiguration; import org.deeplearning4j.nn.conf.ocnn.OCNNOutputLayer; import org.deeplearning4j.nn.conf.preprocessor.CnnToFeedForwardPreProcessor; import org.deeplearning4j.nn.conf.preprocessor.CnnToRnnPreProcessor; @@ -260,8 +260,8 @@ public class DTypeTests extends BaseDL4JTest { for (NeuralNetConfiguration nnc : conf.getNetConfigurations()) { LayerConfiguration l = nnc.getFlattenedLayerConfigurations().get(0); seenLayers.add(l.getClass()); - if (l instanceof BaseWrapperLayer) { - BaseWrapperLayer bwl = (BaseWrapperLayer) l; + if (l instanceof BaseWrapperLayerConfiguration) { + BaseWrapperLayerConfiguration bwl = (BaseWrapperLayerConfiguration) l; seenLayers.add(bwl.getUnderlying().getClass()); } else if (l instanceof Bidirectional) { seenLayers.add(((Bidirectional) l).getFwd().getClass()); @@ -321,17 +321,17 @@ public class DTypeTests extends BaseDL4JTest { net.setInput(inD); net.setLabels(lD); net.computeGradientAndScore(); - double scoreDouble = net.score(); + double scoreDouble = net.getScore(); INDArray grads = net.getFlattenedGradients(); INDArray u = net.getUpdater().getStateViewArray(); - assertEquals(DataType.DOUBLE, net.params().dataType()); + assertEquals(DataType.DOUBLE, net.getModelParams().dataType()); assertEquals(DataType.DOUBLE, grads.dataType()); assertEquals(DataType.DOUBLE, u.dataType()); MultiLayerNetwork netFloat = net.convertDataType(DataType.FLOAT); netFloat.initGradientsView(); - assertEquals(DataType.FLOAT, netFloat.params().dataType()); + assertEquals(DataType.FLOAT, netFloat.getModelParams().dataType()); assertEquals(DataType.FLOAT, netFloat.getFlattenedGradients().dataType()); assertEquals(DataType.FLOAT, netFloat.getUpdater(true).getStateViewArray().dataType()); INDArray inF = inD.castTo(DataType.FLOAT); @@ -340,7 +340,7 @@ public class DTypeTests extends BaseDL4JTest { netFloat.setInput(inF); netFloat.setLabels(lF); netFloat.computeGradientAndScore(); - double scoreFloat = netFloat.score(); + double scoreFloat = netFloat.getScore(); INDArray gradsFloat = netFloat.getFlattenedGradients(); INDArray uFloat = netFloat.getUpdater().getStateViewArray(); @@ -352,7 +352,7 @@ public class DTypeTests extends BaseDL4JTest { MultiLayerNetwork netFP16 = net.convertDataType(DataType.HALF); netFP16.initGradientsView(); - assertEquals(DataType.HALF, netFP16.params().dataType()); + assertEquals(DataType.HALF, netFP16.getModelParams().dataType()); assertEquals(DataType.HALF, netFP16.getFlattenedGradients().dataType()); assertEquals(DataType.HALF, netFP16.getUpdater(true).getStateViewArray().dataType()); @@ -362,7 +362,7 @@ public class DTypeTests extends BaseDL4JTest { netFP16.setInput(inH); netFP16.setLabels(lH); netFP16.computeGradientAndScore(); - double scoreHalf = netFP16.score(); + double scoreHalf = netFP16.getScore(); INDArray gradsHalf = netFP16.getFlattenedGradients(); INDArray uHalf = netFP16.getUpdater().getStateViewArray(); @@ -406,17 +406,17 @@ public class DTypeTests extends BaseDL4JTest { net.setInput(0, inD); net.setLabels(lD); net.computeGradientAndScore(); - double scoreDouble = net.score(); + double scoreDouble = net.getScore(); INDArray grads = net.getFlattenedGradients(); INDArray u = net.getUpdater().getStateViewArray(); - assertEquals(DataType.DOUBLE, net.params().dataType()); + assertEquals(DataType.DOUBLE, net.getModelParams().dataType()); assertEquals(DataType.DOUBLE, grads.dataType()); assertEquals(DataType.DOUBLE, u.dataType()); ComputationGraph netFloat = net.convertDataType(DataType.FLOAT); netFloat.initGradientsView(); - assertEquals(DataType.FLOAT, netFloat.params().dataType()); + assertEquals(DataType.FLOAT, netFloat.getModelParams().dataType()); assertEquals(DataType.FLOAT, netFloat.getFlattenedGradients().dataType()); assertEquals(DataType.FLOAT, netFloat.getUpdater(true).getStateViewArray().dataType()); INDArray inF = inD.castTo(DataType.FLOAT); @@ -425,7 +425,7 @@ public class DTypeTests extends BaseDL4JTest { netFloat.setInput(0, inF); netFloat.setLabels(lF); netFloat.computeGradientAndScore(); - double scoreFloat = netFloat.score(); + double scoreFloat = netFloat.getScore(); INDArray gradsFloat = netFloat.getFlattenedGradients(); INDArray uFloat = netFloat.getUpdater().getStateViewArray(); @@ -437,7 +437,7 @@ public class DTypeTests extends BaseDL4JTest { ComputationGraph netFP16 = net.convertDataType(DataType.HALF); netFP16.initGradientsView(); - assertEquals(DataType.HALF, netFP16.params().dataType()); + assertEquals(DataType.HALF, netFP16.getModelParams().dataType()); assertEquals(DataType.HALF, netFP16.getFlattenedGradients().dataType()); assertEquals(DataType.HALF, netFP16.getUpdater(true).getStateViewArray().dataType()); @@ -447,7 +447,7 @@ public class DTypeTests extends BaseDL4JTest { netFP16.setInput(0, inH); netFP16.setLabels(lH); netFP16.computeGradientAndScore(); - double scoreHalf = netFP16.score(); + double scoreHalf = netFP16.getScore(); INDArray gradsHalf = netFP16.getFlattenedGradients(); INDArray uHalf = netFP16.getUpdater().getStateViewArray(); @@ -536,7 +536,7 @@ public class DTypeTests extends BaseDL4JTest { net.init(); net.initGradientsView(); - assertEquals(networkDtype, net.params().dataType(), msg); + assertEquals(networkDtype, net.getModelParams().dataType(), msg); assertEquals(networkDtype, net.getFlattenedGradients().dataType(), msg); assertEquals(networkDtype, net.getUpdater(true).getStateViewArray().dataType(), msg); @@ -641,7 +641,7 @@ public class DTypeTests extends BaseDL4JTest { net.init(); net.initGradientsView(); - assertEquals(networkDtype, net.params().dataType(), msg); + assertEquals(networkDtype, net.getModelParams().dataType(), msg); assertEquals(networkDtype, net.getFlattenedGradients().dataType(), msg); assertEquals(networkDtype, net.getUpdater(true).getStateViewArray().dataType(), msg); @@ -754,7 +754,7 @@ public class DTypeTests extends BaseDL4JTest { net.init(); net.initGradientsView(); - assertEquals(networkDtype, net.params().dataType(), msg); + assertEquals(networkDtype, net.getModelParams().dataType(), msg); assertEquals(networkDtype, net.getFlattenedGradients().dataType(), msg); assertEquals(networkDtype, net.getUpdater(true).getStateViewArray().dataType(), msg); @@ -827,7 +827,7 @@ public class DTypeTests extends BaseDL4JTest { net.init(); net.initGradientsView(); - assertEquals(networkDtype, net.params().dataType(), msg); + assertEquals(networkDtype, net.getModelParams().dataType(), msg); assertEquals(networkDtype, net.getFlattenedGradients().dataType(), msg); assertEquals(networkDtype, net.getUpdater(true).getStateViewArray().dataType(), msg); @@ -916,7 +916,7 @@ public class DTypeTests extends BaseDL4JTest { net.init(); net.initGradientsView(); - assertEquals(networkDtype, net.params().dataType(), msg); + assertEquals(networkDtype, net.getModelParams().dataType(), msg); assertEquals(networkDtype, net.getFlattenedGradients().dataType(), msg); assertEquals(networkDtype, net.getUpdater(true).getStateViewArray().dataType(), msg); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/ComputationGraphTestRNN.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/ComputationGraphTestRNN.java index de8c16075..4197263b6 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/ComputationGraphTestRNN.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/ComputationGraphTestRNN.java @@ -520,9 +520,9 @@ public class ComputationGraphTestRNN extends BaseDL4JTest { INDArray inputLong = Nd4j.rand(miniBatchSize, nIn, timeSeriesLength); INDArray labelsLong = Nd4j.rand(miniBatchSize, nOut, timeSeriesLength); - INDArray initialParams = graph.params().dup(); + INDArray initialParams = graph.getModelParams().dup(); graph.fit(new INDArray[] {inputLong}, new INDArray[] {labelsLong}); - INDArray afterParams = graph.params(); + INDArray afterParams = graph.getModelParams(); assertNotEquals(initialParams, afterParams); } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestCompGraphCNN.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestCompGraphCNN.java index d83f4ac17..4129592b6 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestCompGraphCNN.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestCompGraphCNN.java @@ -117,7 +117,7 @@ public class TestCompGraphCNN extends BaseDL4JTest { boolean orderOK = Arrays.equals(expOrder1, order) || Arrays.equals(expOrder2, order); assertTrue(orderOK); - INDArray params = graph.params(); + INDArray params = graph.getModelParams(); assertNotNull(params); // confirm param shape is what is expected @@ -129,7 +129,7 @@ public class TestCompGraphCNN extends BaseDL4JTest { // params are set graph.setParams(arr); - params = graph.params(); + params = graph.getModelParams(); assertEquals(arr, params); //Number of inputs and outputs: diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestCompGraphUnsupervised.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestCompGraphUnsupervised.java index b24dc76ed..2cf9e0db4 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestCompGraphUnsupervised.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestCompGraphUnsupervised.java @@ -108,7 +108,7 @@ public class TestCompGraphUnsupervised extends BaseDL4JTest { } } - int count = Nd4j.getExecutioner().exec(new MatchCondition(cg.params(), Conditions.isNan())).getInt(0); + int count = Nd4j.getExecutioner().exec(new MatchCondition(cg.getModelParams(), Conditions.isNan())).getInt(0); assertEquals(0, count); @@ -125,7 +125,7 @@ public class TestCompGraphUnsupervised extends BaseDL4JTest { } } - count = Nd4j.getExecutioner().exec(new MatchCondition(cg.params(), Conditions.isNan())).getInt(0); + count = Nd4j.getExecutioner().exec(new MatchCondition(cg.getModelParams(), Conditions.isNan())).getInt(0); assertEquals(0, count); } } @@ -176,7 +176,7 @@ public class TestCompGraphUnsupervised extends BaseDL4JTest { Nd4j.getRandom().setSeed(12345); cg.pretrainLayer("0", ds); - assertEquals(net.params(), cg.params()); + assertEquals(net.getModelParams(), cg.getModelParams()); } } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestComputationGraphNetwork.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestComputationGraphNetwork.java index 7feb29ddb..46180da6d 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestComputationGraphNetwork.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestComputationGraphNetwork.java @@ -159,7 +159,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { DataSet ds = iris.next(); graph.setInput(0, ds.getFeatures()); - net.setParams(graph.params()); + net.setParams(graph.getModelParams()); Map activations = graph.feedForward(false); List feedForward = net.feedForward(ds.getFeatures()); @@ -184,7 +184,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { int[] expOrder = new int[]{0, 1, 2}; assertArrayEquals(expOrder, order); //Only one valid order: 0 (input) -> 1 (firstlayer) -> 2 (outputlayer) - INDArray params = graph.params(); + INDArray params = graph.getModelParams(); assertNotNull(params); int nParams = getNumParams(); @@ -194,7 +194,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { assertEquals(nParams, arr.length()); graph.setParams(arr); - params = graph.params(); + params = graph.getModelParams(); assertEquals(arr, params); //Number of inputs and outputs: @@ -315,8 +315,8 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { graph.fit(iris); //Check that parameters are equal for both models after fitting: - INDArray paramsMLN = net.params(); - INDArray paramsGraph = graph.params(); + INDArray paramsMLN = net.getModelParams(); + INDArray paramsGraph = graph.getModelParams(); assertNotEquals(params, paramsGraph); assertEquals(paramsMLN, paramsGraph); @@ -636,7 +636,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { ComputationGraph net = new ComputationGraph(conf); net.init(); - net.setListeners(new ScoreIterationListener(1)); + net.addTrainingListeners(new ScoreIterationListener(1)); DataSetIterator iter = new IrisDataSetIterator(10, 150); net.pretrain(iter); @@ -675,7 +675,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { ComputationGraph netNoReg = new ComputationGraph(confNoReg); netNoReg.init(); - netNoReg.setParams(net.params().dup()); + netNoReg.setParams(net.getModelParams().dup()); //Score single example, and compare to scoreExamples: INDArray input = Nd4j.rand(3, nIn); @@ -878,13 +878,13 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { net.setParam("first_b", Nd4j.ones(1, 5)); net.setParam("output_W", Nd4j.ones(5, 3)); net.setParam("output_b", Nd4j.ones(1, 3)); - INDArray actualParams = net.params(); + INDArray actualParams = net.getModelParams(); // Confirm params assertEquals(Nd4j.ones(1, 43), actualParams); net.update(expectedGradient); - actualParams = net.params(); + actualParams = net.getModelParams(); assertEquals(Nd4j.ones(1, 43).addi(1), actualParams); } @@ -1638,7 +1638,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { conf3.setTopologicalOrderStr(null); ComputationGraph cg3 = new ComputationGraph(conf3); cg3.init(); - cg3.setParams(cg2.params()); + cg3.setParams(cg2.getModelParams()); int[] order3 = cg3.topologicalSortOrder(); List strOrder3 = cg.getComputationGraphConfiguration().getTopologicalOrderStr(); @@ -1712,7 +1712,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { exp.add(ComputationGraph.class); MultiLayerTest.CheckModelsListener listener = new MultiLayerTest.CheckModelsListener(); - net.setListeners(listener); + net.addTrainingListeners(listener); INDArray f = Nd4j.create(1,10); INDArray l = Nd4j.create(1,10); @@ -1874,7 +1874,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { ComputationGraph cg = new ComputationGraph(conf); cg.init(); - cg.params().assign(Nd4j.linspace(1, 220, 220).reshape(1, -11)); + cg.getModelParams().assign(Nd4j.linspace(1, 220, 220).reshape(1, -11)); INDArray p0w = cg.getParam("layer_zero_W"); assertEquals(Nd4j.linspace(1, 100, 100).reshape('f', 10, 10), p0w); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestSetGetParameters.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestSetGetParameters.java index 2f752b316..685920d10 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestSetGetParameters.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestSetGetParameters.java @@ -56,7 +56,7 @@ public class TestSetGetParameters extends BaseDL4JTest { ComputationGraph net = new ComputationGraph(conf); net.init(); - INDArray params = net.params(); + INDArray params = net.getModelParams(); ComputationGraph net2 = new ComputationGraph(conf); @@ -65,11 +65,11 @@ public class TestSetGetParameters extends BaseDL4JTest { ComputationGraph net3 = new ComputationGraph(conf); net3.init(params, false); - assertEquals(params, net2.params()); - assertEquals(params, net3.params()); + assertEquals(params, net2.getModelParams()); + assertEquals(params, net3.getModelParams()); - assertNotSame(params, net2.params()); //Different objects due to clone - assertSame(params, net3.params()); //Same object due to clone + assertNotSame(params, net2.getModelParams()); //Different objects due to clone + assertSame(params, net3.getModelParams()); //Same object due to clone Map paramsMap = net.getParamTable(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestVariableLengthTSCG.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestVariableLengthTSCG.java index 237e7550e..7023e0039 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestVariableLengthTSCG.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestVariableLengthTSCG.java @@ -103,14 +103,14 @@ public class TestVariableLengthTSCG extends BaseDL4JTest { net.setInput(0, in1); net.setLabel(0, labels1); net.computeGradientAndScore(); - double score1 = net.score(); + double score1 = net.getScore(); Gradient g1 = net.gradient(); net.setInput(0, in2); net.setLabel(0, labels2); net.setLayerMaskArrays(null, new INDArray[] {labelMask}); net.computeGradientAndScore(); - double score2 = net.score(); + double score2 = net.getScore(); Gradient g2 = net.gradient(); //Scores and gradients should be identical for two cases (given mask array) @@ -134,7 +134,7 @@ public class TestVariableLengthTSCG extends BaseDL4JTest { } net.setLabel(0, labels2); net.computeGradientAndScore(); - double score2a = net.score(); + double score2a = net.getScore(); Gradient g2a = net.gradient(); assertEquals(score2, score2a, 1e-6); for (String s : g2map.keySet()) { @@ -200,7 +200,7 @@ public class TestVariableLengthTSCG extends BaseDL4JTest { net.setInput(0, in1); net.setLabel(0, labels1); net.computeGradientAndScore(); - double score1 = net.score(); + double score1 = net.getScore(); Gradient g1 = net.gradient(); Map map = g1.gradientForVariable(); for (String s : map.keySet()) { @@ -211,7 +211,7 @@ public class TestVariableLengthTSCG extends BaseDL4JTest { net.setLabel(0, labels2); net.setLayerMaskArrays(new INDArray[] {inputMask}, null); net.computeGradientAndScore(); - double score2 = net.score(); + double score2 = net.getScore(); Gradient g2 = net.gradient(); Map activations2 = net.feedForward(); @@ -236,7 +236,7 @@ public class TestVariableLengthTSCG extends BaseDL4JTest { net.setInput(0, in2); net.setLayerMaskArrays(new INDArray[]{inputMask}, null); net.computeGradientAndScore(); - double score2a = net.score(); + double score2a = net.getScore(); Gradient g2a = net.gradient(); assertEquals(score2, score2a, 1e-12); for (String s : g2.gradientForVariable().keySet()) { @@ -330,7 +330,7 @@ public class TestVariableLengthTSCG extends BaseDL4JTest { net.setLabel(0, labels); net.computeGradientAndScore(); - double score = net.score(); + double score = net.getScore(); assertEquals(expScore, score, 0.1, msg); } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/BaseLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/BaseLayerConfigurationTest.java similarity index 98% rename from cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/BaseLayerTest.java rename to cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/BaseLayerConfigurationTest.java index 189467ab4..c481d20df 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/BaseLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/BaseLayerConfigurationTest.java @@ -40,7 +40,7 @@ import java.util.Map; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotEquals; -public class BaseLayerTest extends BaseDL4JTest { +public class BaseLayerConfigurationTest extends BaseDL4JTest { protected INDArray weight = Nd4j.create(new double[] {0.10, -0.20, -0.15, 0.05}, new int[] {2, 2}); protected INDArray bias = Nd4j.create(new double[] {0.5, 0.5}, new int[] {1, 2}); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/CacheModeTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/CacheModeTest.java index 002495133..7898d35ad 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/CacheModeTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/CacheModeTest.java @@ -56,10 +56,10 @@ public class CacheModeTest extends BaseDL4JTest { INDArray out2 = net2.output(in); assertEquals(out1, out2); - assertEquals(net1.params(), net2.params()); + assertEquals(net1.getModelParams(), net2.getModelParams()); net1.fit(in, labels); net2.fit(in, labels); - assertEquals(net1.params(), net2.params()); + assertEquals(net1.getModelParams(), net2.getModelParams()); } private static NeuralNetConfiguration getConf(CacheMode cacheMode){ @@ -99,10 +99,10 @@ public class CacheModeTest extends BaseDL4JTest { INDArray out2 = net2.output(in); assertEquals(out1, out2); - assertEquals(net1.params(), net2.params()); + assertEquals(net1.getModelParams(), net2.getModelParams()); net1.fit(in, labels); net2.fit(in, labels); - assertEquals(net1.params(), net2.params()); + assertEquals(net1.getModelParams(), net2.getModelParams()); } } @@ -145,10 +145,10 @@ public class CacheModeTest extends BaseDL4JTest { INDArray out2 = net2.outputSingle(in); assertEquals(out1, out2); - assertEquals(net1.params(), net2.params()); + assertEquals(net1.getModelParams(), net2.getModelParams()); net1.fit(new DataSet(in, labels)); net2.fit(new DataSet(in, labels)); - assertEquals(net1.params(), net2.params()); + assertEquals(net1.getModelParams(), net2.getModelParams()); } private static ComputationGraphConfiguration getConfCG(CacheMode cacheMode){ diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/CenterLossOutputLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/CenterLossOutputLayerTest.java index 9f5597199..84f94928b 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/CenterLossOutputLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/CenterLossOutputLayerTest.java @@ -121,7 +121,7 @@ public class CenterLossOutputLayerTest extends BaseDL4JTest { graph.setInput(0, input); graph.setLabel(0, labels); graph.computeGradientAndScore(); - results[i] = graph.score(); + results[i] = graph.getScore(); } assertNotEquals(results[0], results[1]); @@ -137,7 +137,7 @@ public class CenterLossOutputLayerTest extends BaseDL4JTest { ComputationGraph net = getCNNMnistConfig(); net.init(); - net.setListeners(new ScoreIterationListener(1)); + net.addTrainingListeners(new ScoreIterationListener(1)); for (int i = 0; i < 50; i++) { net.fit(mnistTrain.next()); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/DropoutLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/DropoutLayerTest.java index 716bbb8a9..80cf35543 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/DropoutLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/DropoutLayerTest.java @@ -265,7 +265,7 @@ public class DropoutLayerTest extends BaseDL4JTest { MultiLayerNetwork netSeparate = new MultiLayerNetwork(confSeparate); netSeparate.init(); - assertEquals(netIntegrated.params(), netSeparate.params()); + assertEquals(netIntegrated.getModelParams(), netSeparate.getModelParams()); Nd4j.getRandom().setSeed(12345); netIntegrated.fit(next); @@ -273,7 +273,7 @@ public class DropoutLayerTest extends BaseDL4JTest { Nd4j.getRandom().setSeed(12345); netSeparate.fit(next); - assertEquals(netIntegrated.params(), netSeparate.params()); + assertEquals(netIntegrated.getModelParams(), netSeparate.getModelParams()); // check parameters assertEquals(netIntegrated.getLayer(0).getParam("W"), netSeparate.getLayer(0).getParam("W")); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/FrozenLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/FrozenLayerTest.java index 2b8977ed0..20880d71a 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/FrozenLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/FrozenLayerTest.java @@ -80,7 +80,7 @@ public class FrozenLayerTest extends BaseDL4JTest { .setFeatureExtractor(1).build(); INDArray paramsLastTwoLayers = - Nd4j.hstack(modelToFineTune.getLayer(2).params(), modelToFineTune.getLayer(3).params()); + Nd4j.hstack(modelToFineTune.getLayer(2).getParams(), modelToFineTune.getLayer(3).getParams()); MultiLayerNetwork notFrozen = new MultiLayerNetwork( (NeuralNetConfiguration) overallConf.clone() .layer(0, new Builder().nIn(2).nOut(3).build()) @@ -102,9 +102,9 @@ public class FrozenLayerTest extends BaseDL4JTest { modelNow.fit(randomData); } - INDArray expected = Nd4j.hstack(modelToFineTune.getLayer(0).params(), modelToFineTune.getLayer(1).params(), - notFrozen.params()); - INDArray act = modelNow.params(); + INDArray expected = Nd4j.hstack(modelToFineTune.getLayer(0).getParams(), modelToFineTune.getLayer(1).getParams(), + notFrozen.getModelParams()); + INDArray act = modelNow.getModelParams(); assertEquals(expected, act); } @@ -136,7 +136,7 @@ public class FrozenLayerTest extends BaseDL4JTest { assertEquals(modelNow.getNetConfiguration().toJson(), clonedModel.getNetConfiguration().toJson()); //Check params - assertEquals(modelNow.params(), clonedModel.params()); + assertEquals(modelNow.getModelParams(), clonedModel.getModelParams()); MultiLayerNetwork notFrozen = new MultiLayerNetwork( (NeuralNetConfiguration) overallConf.layer(0, new Builder().nIn(2).nOut(3).build()) @@ -145,7 +145,7 @@ public class FrozenLayerTest extends BaseDL4JTest { .activation(Activation.SOFTMAX).nIn(3).nOut(3) .build()) .build(), - Nd4j.hstack(modelToFineTune.getLayer(2).params(), modelToFineTune.getLayer(3).params())); + Nd4j.hstack(modelToFineTune.getLayer(2).getParams(), modelToFineTune.getLayer(3).getParams())); int i = 0; while (i < 5) { @@ -155,10 +155,10 @@ public class FrozenLayerTest extends BaseDL4JTest { i++; } - INDArray expectedParams = Nd4j.hstack(modelToFineTune.getLayer(0).params(), - modelToFineTune.getLayer(1).params(), notFrozen.params()); - assertEquals(expectedParams, modelNow.params()); - assertEquals(expectedParams, clonedModel.params()); + INDArray expectedParams = Nd4j.hstack(modelToFineTune.getLayer(0).getParams(), + modelToFineTune.getLayer(1).getParams(), notFrozen.getModelParams()); + assertEquals(expectedParams, modelNow.getModelParams()); + assertEquals(expectedParams, clonedModel.getModelParams()); } @@ -199,8 +199,8 @@ public class FrozenLayerTest extends BaseDL4JTest { .setOutputs("layer1").build()); notFrozen.init(); - notFrozen.setParams(Nd4j.hstack(modelToFineTune.getLayer("layer2").params(), - modelToFineTune.getLayer("layer3").params())); + notFrozen.setParams(Nd4j.hstack(modelToFineTune.getLayer("layer2").getParams(), + modelToFineTune.getLayer("layer3").getParams())); int i = 0; while (i < 5) { @@ -209,8 +209,8 @@ public class FrozenLayerTest extends BaseDL4JTest { i++; } - assertEquals(Nd4j.hstack(modelToFineTune.getLayer("layer0").params(), - modelToFineTune.getLayer("layer1").params(), notFrozen.params()), modelNow.params()); + assertEquals(Nd4j.hstack(modelToFineTune.getLayer("layer0").getParams(), + modelToFineTune.getLayer("layer1").getParams(), notFrozen.getModelParams()), modelNow.getModelParams()); } @Test @@ -244,7 +244,7 @@ public class FrozenLayerTest extends BaseDL4JTest { assertEquals(clonedModel.getComputationGraphConfiguration().toJson(), modelNow.getComputationGraphConfiguration().toJson()); //Check params - assertEquals(modelNow.params(), clonedModel.params()); + assertEquals(modelNow.getModelParams(), clonedModel.getModelParams()); ComputationGraph notFrozen = new ComputationGraph(overallConf.graphBuilder().addInputs("layer0In") .addLayer("layer0", new DenseLayer.Builder().nIn(2).nOut(3).build(), "layer0In") @@ -256,8 +256,8 @@ public class FrozenLayerTest extends BaseDL4JTest { "layer0") .setOutputs("layer1").build()); notFrozen.init(); - notFrozen.setParams(Nd4j.hstack(modelToFineTune.getLayer("layer2").params(), - modelToFineTune.getLayer("layer3").params())); + notFrozen.setParams(Nd4j.hstack(modelToFineTune.getLayer("layer2").getParams(), + modelToFineTune.getLayer("layer3").getParams())); int i = 0; @@ -268,10 +268,10 @@ public class FrozenLayerTest extends BaseDL4JTest { i++; } - INDArray expectedParams = Nd4j.hstack(modelToFineTune.getLayer("layer0").params(), - modelToFineTune.getLayer("layer1").params(), notFrozen.params()); - assertEquals(expectedParams, modelNow.params()); - assertEquals(expectedParams, clonedModel.params()); + INDArray expectedParams = Nd4j.hstack(modelToFineTune.getLayer("layer0").getParams(), + modelToFineTune.getLayer("layer1").getParams(), notFrozen.getModelParams()); + assertEquals(expectedParams, modelNow.getModelParams()); + assertEquals(expectedParams, clonedModel.getModelParams()); } @@ -305,7 +305,7 @@ public class FrozenLayerTest extends BaseDL4JTest { MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); net2.init(); - assertEquals(net1.params(), net2.params()); + assertEquals(net1.getModelParams(), net2.getModelParams()); String json = conf2.toJson(); @@ -362,7 +362,7 @@ public class FrozenLayerTest extends BaseDL4JTest { ComputationGraph net2 = new ComputationGraph(conf2); net2.init(); - assertEquals(net1.params(), net2.params()); + assertEquals(net1.getModelParams(), net2.getModelParams()); String json = conf2.toJson(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/FrozenLayerWithBackpropTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/FrozenLayerWithBackpropTest.java index 89c359ae7..d47973a89 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/FrozenLayerWithBackpropTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/FrozenLayerWithBackpropTest.java @@ -75,7 +75,7 @@ public class FrozenLayerWithBackpropTest extends BaseDL4JTest { MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); net2.init(); - assertEquals(net1.params(), net2.params()); + assertEquals(net1.getModelParams(), net2.getModelParams()); String json = conf2.toJson(); @@ -130,7 +130,7 @@ public class FrozenLayerWithBackpropTest extends BaseDL4JTest { ComputationGraph net2 = new ComputationGraph(conf2); net2.init(); - assertEquals(net1.params(), net2.params()); + assertEquals(net1.getModelParams(), net2.getModelParams()); String json = conf2.toJson(); @@ -170,19 +170,19 @@ public class FrozenLayerWithBackpropTest extends BaseDL4JTest { MultiLayerNetwork network = new MultiLayerNetwork(conf1); network.init(); - INDArray unfrozenLayerParams = network.getLayer(0).params().dup(); - INDArray frozenLayerParams1 = network.getLayer(1).params().dup(); - INDArray frozenLayerParams2 = network.getLayer(2).params().dup(); - INDArray frozenOutputLayerParams = network.getLayer(3).params().dup(); + INDArray unfrozenLayerParams = network.getLayer(0).getParams().dup(); + INDArray frozenLayerParams1 = network.getLayer(1).getParams().dup(); + INDArray frozenLayerParams2 = network.getLayer(2).getParams().dup(); + INDArray frozenOutputLayerParams = network.getLayer(3).getParams().dup(); for (int i = 0; i < 100; i++) { network.fit(randomData); } - assertNotEquals(unfrozenLayerParams, network.getLayer(0).params()); - assertEquals(frozenLayerParams1, network.getLayer(1).params()); - assertEquals(frozenLayerParams2, network.getLayer(2).params()); - assertEquals(frozenOutputLayerParams, network.getLayer(3).params()); + assertNotEquals(unfrozenLayerParams, network.getLayer(0).getParams()); + assertEquals(frozenLayerParams1, network.getLayer(1).getParams()); + assertEquals(frozenLayerParams2, network.getLayer(2).getParams()); + assertEquals(frozenOutputLayerParams, network.getLayer(3).getParams()); } @@ -228,19 +228,19 @@ public class FrozenLayerWithBackpropTest extends BaseDL4JTest { ComputationGraph computationGraph = new ComputationGraph(computationGraphConf); computationGraph.init(); - INDArray unfrozenLayerParams = computationGraph.getLayer(frozenBranchUnfrozenLayer0).params().dup(); - INDArray frozenLayerParams1 = computationGraph.getLayer(frozenBranchFrozenLayer1).params().dup(); - INDArray frozenLayerParams2 = computationGraph.getLayer(frozenBranchFrozenLayer2).params().dup(); - INDArray frozenOutputLayerParams = computationGraph.getLayer(frozenBranchOutput).params().dup(); + INDArray unfrozenLayerParams = computationGraph.getLayer(frozenBranchUnfrozenLayer0).getParams().dup(); + INDArray frozenLayerParams1 = computationGraph.getLayer(frozenBranchFrozenLayer1).getParams().dup(); + INDArray frozenLayerParams2 = computationGraph.getLayer(frozenBranchFrozenLayer2).getParams().dup(); + INDArray frozenOutputLayerParams = computationGraph.getLayer(frozenBranchOutput).getParams().dup(); for (int i = 0; i < 100; i++) { computationGraph.fit(randomData); } - assertNotEquals(unfrozenLayerParams, computationGraph.getLayer(frozenBranchUnfrozenLayer0).params()); - assertEquals(frozenLayerParams1, computationGraph.getLayer(frozenBranchFrozenLayer1).params()); - assertEquals(frozenLayerParams2, computationGraph.getLayer(frozenBranchFrozenLayer2).params()); - assertEquals(frozenOutputLayerParams, computationGraph.getLayer(frozenBranchOutput).params()); + assertNotEquals(unfrozenLayerParams, computationGraph.getLayer(frozenBranchUnfrozenLayer0).getParams()); + assertEquals(frozenLayerParams1, computationGraph.getLayer(frozenBranchFrozenLayer1).getParams()); + assertEquals(frozenLayerParams2, computationGraph.getLayer(frozenBranchFrozenLayer2).getParams()); + assertEquals(frozenOutputLayerParams, computationGraph.getLayer(frozenBranchOutput).getParams()); } @@ -275,17 +275,17 @@ public class FrozenLayerWithBackpropTest extends BaseDL4JTest { .build(); MultiLayerNetwork frozenNetwork = new MultiLayerNetwork(confFrozen); frozenNetwork.init(); - INDArray unfrozenLayerParams = frozenNetwork.getLayer(0).params().dup(); - INDArray frozenLayerParams1 = frozenNetwork.getLayer(1).params().dup(); - INDArray frozenLayerParams2 = frozenNetwork.getLayer(2).params().dup(); - INDArray frozenOutputLayerParams = frozenNetwork.getLayer(3).params().dup(); + INDArray unfrozenLayerParams = frozenNetwork.getLayer(0).getParams().dup(); + INDArray frozenLayerParams1 = frozenNetwork.getLayer(1).getParams().dup(); + INDArray frozenLayerParams2 = frozenNetwork.getLayer(2).getParams().dup(); + INDArray frozenOutputLayerParams = frozenNetwork.getLayer(3).getParams().dup(); MultiLayerNetwork sgdNetwork = new MultiLayerNetwork(confSgd); sgdNetwork.init(); - INDArray unfrozenSgdLayerParams = sgdNetwork.getLayer(0).params().dup(); - INDArray frozenSgdLayerParams1 = sgdNetwork.getLayer(1).params().dup(); - INDArray frozenSgdLayerParams2 = sgdNetwork.getLayer(2).params().dup(); - INDArray frozenSgdOutputLayerParams = sgdNetwork.getLayer(3).params().dup(); + INDArray unfrozenSgdLayerParams = sgdNetwork.getLayer(0).getParams().dup(); + INDArray frozenSgdLayerParams1 = sgdNetwork.getLayer(1).getParams().dup(); + INDArray frozenSgdLayerParams2 = sgdNetwork.getLayer(2).getParams().dup(); + INDArray frozenSgdOutputLayerParams = sgdNetwork.getLayer(3).getParams().dup(); for (int i = 0; i < 100; i++) { frozenNetwork.fit(randomData); @@ -294,10 +294,10 @@ public class FrozenLayerWithBackpropTest extends BaseDL4JTest { sgdNetwork.fit(randomData); } - assertEquals(frozenNetwork.getLayer(0).params(), sgdNetwork.getLayer(0).params()); - assertEquals(frozenNetwork.getLayer(1).params(), sgdNetwork.getLayer(1).params()); - assertEquals(frozenNetwork.getLayer(2).params(), sgdNetwork.getLayer(2).params()); - assertEquals(frozenNetwork.getLayer(3).params(), sgdNetwork.getLayer(3).params()); + assertEquals(frozenNetwork.getLayer(0).getParams(), sgdNetwork.getLayer(0).getParams()); + assertEquals(frozenNetwork.getLayer(1).getParams(), sgdNetwork.getLayer(1).getParams()); + assertEquals(frozenNetwork.getLayer(2).getParams(), sgdNetwork.getLayer(2).getParams()); + assertEquals(frozenNetwork.getLayer(3).getParams(), sgdNetwork.getLayer(3).getParams()); } @@ -360,17 +360,17 @@ public class FrozenLayerWithBackpropTest extends BaseDL4JTest { ComputationGraph frozenComputationGraph = new ComputationGraph(computationGraphConf); frozenComputationGraph.init(); - INDArray unfrozenLayerParams = frozenComputationGraph.getLayer(frozenBranchUnfrozenLayer0).params().dup(); - INDArray frozenLayerParams1 = frozenComputationGraph.getLayer(frozenBranchFrozenLayer1).params().dup(); - INDArray frozenLayerParams2 = frozenComputationGraph.getLayer(frozenBranchFrozenLayer2).params().dup(); - INDArray frozenOutputLayerParams = frozenComputationGraph.getLayer(frozenBranchOutput).params().dup(); + INDArray unfrozenLayerParams = frozenComputationGraph.getLayer(frozenBranchUnfrozenLayer0).getParams().dup(); + INDArray frozenLayerParams1 = frozenComputationGraph.getLayer(frozenBranchFrozenLayer1).getParams().dup(); + INDArray frozenLayerParams2 = frozenComputationGraph.getLayer(frozenBranchFrozenLayer2).getParams().dup(); + INDArray frozenOutputLayerParams = frozenComputationGraph.getLayer(frozenBranchOutput).getParams().dup(); ComputationGraph sgdComputationGraph = new ComputationGraph(computationSgdGraphConf); sgdComputationGraph.init(); - INDArray unfrozenSgdLayerParams = sgdComputationGraph.getLayer(frozenBranchUnfrozenLayer0).params().dup(); - INDArray frozenSgdLayerParams1 = sgdComputationGraph.getLayer(frozenBranchFrozenLayer1).params().dup(); - INDArray frozenSgdLayerParams2 = sgdComputationGraph.getLayer(frozenBranchFrozenLayer2).params().dup(); - INDArray frozenSgdOutputLayerParams = sgdComputationGraph.getLayer(frozenBranchOutput).params().dup(); + INDArray unfrozenSgdLayerParams = sgdComputationGraph.getLayer(frozenBranchUnfrozenLayer0).getParams().dup(); + INDArray frozenSgdLayerParams1 = sgdComputationGraph.getLayer(frozenBranchFrozenLayer1).getParams().dup(); + INDArray frozenSgdLayerParams2 = sgdComputationGraph.getLayer(frozenBranchFrozenLayer2).getParams().dup(); + INDArray frozenSgdOutputLayerParams = sgdComputationGraph.getLayer(frozenBranchOutput).getParams().dup(); for (int i = 0; i < 100; i++) { frozenComputationGraph.fit(randomData); @@ -379,10 +379,10 @@ public class FrozenLayerWithBackpropTest extends BaseDL4JTest { sgdComputationGraph.fit(randomData); } - assertEquals(frozenComputationGraph.getLayer(frozenBranchUnfrozenLayer0).params(), sgdComputationGraph.getLayer(frozenBranchUnfrozenLayer0).params()); - assertEquals(frozenComputationGraph.getLayer(frozenBranchFrozenLayer1).params(), sgdComputationGraph.getLayer(frozenBranchFrozenLayer1).params()); - assertEquals(frozenComputationGraph.getLayer(frozenBranchFrozenLayer2).params(), sgdComputationGraph.getLayer(frozenBranchFrozenLayer2).params()); - assertEquals(frozenComputationGraph.getLayer(frozenBranchOutput).params(), sgdComputationGraph.getLayer(frozenBranchOutput).params()); + assertEquals(frozenComputationGraph.getLayer(frozenBranchUnfrozenLayer0).getParams(), sgdComputationGraph.getLayer(frozenBranchUnfrozenLayer0).getParams()); + assertEquals(frozenComputationGraph.getLayer(frozenBranchFrozenLayer1).getParams(), sgdComputationGraph.getLayer(frozenBranchFrozenLayer1).getParams()); + assertEquals(frozenComputationGraph.getLayer(frozenBranchFrozenLayer2).getParams(), sgdComputationGraph.getLayer(frozenBranchFrozenLayer2).getParams()); + assertEquals(frozenComputationGraph.getLayer(frozenBranchOutput).getParams(), sgdComputationGraph.getLayer(frozenBranchOutput).getParams()); } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/OutputLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/OutputLayerTest.java index 0d4f0d710..6e2132d92 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/OutputLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/OutputLayerTest.java @@ -68,9 +68,9 @@ public class OutputLayerTest extends BaseDL4JTest { INDArray params = Nd4j.create(1, numParams); OutputLayer l = (OutputLayer) conf.getFlattenedLayerConfigurations().get(0).instantiate(conf, Collections.singletonList(new ScoreIterationListener(1)), 0, params, true, params.dataType()); - params = l.params(); + params = l.getModelParams(); l.setParamsTable(params); - assertEquals(params, l.params()); + assertEquals(params, l.getModelParams()); } @Test @@ -217,8 +217,8 @@ public class OutputLayerTest extends BaseDL4JTest { //However: OutputLayer version has miniBatch*timeSeriesLength "examples" (after reshaping) //RnnOutputLayer has miniBatch examples //Hence: expect difference in scores by factor of timeSeriesLength - double score = mln.score() * timeSeriesLength; - double scoreRNN = mlnRnn.score(); + double score = mln.getScore() * timeSeriesLength; + double scoreRNN = mlnRnn.getScore(); assertFalse(Double.isNaN(score)); assertFalse(Double.isNaN(scoreRNN)); @@ -234,7 +234,7 @@ public class OutputLayerTest extends BaseDL4JTest { RnnOutputLayer rnnol = (RnnOutputLayer) mlnRnn.getOutputLayer(); //assertArrayEquals(rnnol.getInput().shape(),new int[]{miniBatchSize,layerSize,timeSeriesLength}); - //Input may be set by BaseLayer methods. Thus input may end up as reshaped 2d version instead of original 3d version. + //Input may be set by BaseLayerConfiguration methods. Thus input may end up as reshaped 2d version instead of original 3d version. //Not ideal, but everything else works. assertArrayEquals(rnnol.getLabels().shape(), new long[] {miniBatchSize, nOut, timeSeriesLength}); @@ -303,7 +303,7 @@ public class OutputLayerTest extends BaseDL4JTest { MultiLayerNetwork mln2 = new MultiLayerNetwork(conf2); mln2.init(); - mln2.setParams(mln.params()); + mln2.setParams(mln.getModelParams()); INDArray in = Nd4j.rand(miniBatchSize, nIn, timeSeriesLength); @@ -330,7 +330,7 @@ public class OutputLayerTest extends BaseDL4JTest { mln2.computeGradientAndScore(); assertEquals(mln.gradient().gradient(), mln2.gradient().gradient()); - assertEquals(mln.score(), mln2.score(), 1e-6); + assertEquals(mln.getScore(), mln2.getScore(), 1e-6); TestUtils.testModelSerialization(mln); } @@ -386,7 +386,7 @@ public class OutputLayerTest extends BaseDL4JTest { mln2.init(); - mln2.setParams(mln.params()); + mln2.setParams(mln.getModelParams()); INDArray in = Nd4j.rand(3, 3, 5, 5); @@ -407,7 +407,7 @@ public class OutputLayerTest extends BaseDL4JTest { mln.computeGradientAndScore(); mln2.computeGradientAndScore(); - assertEquals(mln.score(), mln2.score(), 1e-6); + assertEquals(mln.getScore(), mln2.getScore(), 1e-6); assertEquals(mln.gradient().gradient(), mln2.gradient().gradient()); //Also check computeScoreForExamples @@ -479,7 +479,7 @@ public class OutputLayerTest extends BaseDL4JTest { graph2.init(); - graph2.setParams(graph.params()); + graph2.setParams(graph.getModelParams()); INDArray in = Nd4j.rand(3, 3, 5, 5); @@ -500,7 +500,7 @@ public class OutputLayerTest extends BaseDL4JTest { graph.computeGradientAndScore(); graph2.computeGradientAndScore(); - assertEquals(graph.score(), graph2.score(), 1e-6); + assertEquals(graph.getScore(), graph2.getScore(), 1e-6); assertEquals(graph.gradient().gradient(), graph2.gradient().gradient()); //Also check computeScoreForExamples diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/SeedTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/SeedTest.java index 6306c333b..f1e64f204 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/SeedTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/SeedTest.java @@ -59,13 +59,13 @@ public class SeedTest extends BaseDL4JTest { layer.fit(data.getFeatures(), LayerWorkspaceMgr.noWorkspaces()); layer.computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces()); - double score = layer.score(); - INDArray parameters = layer.params(); + double score = layer.getScore(); + INDArray parameters = layer.getParams(); layer.setParams(parameters); layer.computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces()); - double score2 = layer.score(); - assertEquals(parameters, layer.params()); + double score2 = layer.getScore(); + assertEquals(parameters, layer.getParams()); assertEquals(score, score2, 1e-4); } } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvDataFormatTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvDataFormatTests.java index 44ee236c8..6f24ff226 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvDataFormatTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvDataFormatTests.java @@ -845,9 +845,9 @@ public class ConvDataFormatTests extends BaseDL4JTest { public static void testHelper(TestCase tc) { - tc.net2.params().assign(tc.net1.params()); - tc.net3.params().assign(tc.net1.params()); - tc.net4.params().assign(tc.net1.params()); + tc.net2.getModelParams().assign(tc.net1.getModelParams()); + tc.net3.getModelParams().assign(tc.net1.getModelParams()); + tc.net4.getModelParams().assign(tc.net1.getModelParams()); //Test forward pass: INDArray inNCHW = tc.inNCHW; @@ -909,9 +909,9 @@ public class ConvDataFormatTests extends BaseDL4JTest { tc.net3.fit(inNHWC, tc.labelsNHWC); tc.net4.fit(inNHWC, tc.labelsNHWC); - assertEquals(tc.net1.params(), tc.net2.params(), tc.msg); - assertEquals(tc.net1.params(), tc.net3.params(), tc.msg); - assertEquals(tc.net1.params(), tc.net4.params(), tc.msg); + assertEquals(tc.net1.getModelParams(), tc.net2.getModelParams(), tc.msg); + assertEquals(tc.net1.getModelParams(), tc.net3.getModelParams(), tc.msg); + assertEquals(tc.net1.getModelParams(), tc.net4.getModelParams(), tc.msg); //Test serialization MultiLayerNetwork net1a = TestUtils.testModelSerialization(tc.net1); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayerTest.java index f234d3b78..4b5458b15 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayerTest.java @@ -30,7 +30,6 @@ import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration.NeuralNetConfigurationBuilder; import org.deeplearning4j.nn.conf.RNNFormat; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.Convolution1DLayer; @@ -38,7 +37,6 @@ import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; import org.deeplearning4j.nn.conf.layers.*; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.weights.WeightInit; -import org.deeplearning4j.nn.weights.WeightInitNormal; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.junit.jupiter.api.Test; import org.nd4j.linalg.activations.Activation; @@ -450,10 +448,10 @@ public class ConvolutionLayerTest extends BaseDL4JTest { MultiLayerNetwork net = getCNNMLNConfig(true, false); - INDArray paramsOrig = net.params().dup(); + INDArray paramsOrig = net.getModelParams().dup(); net.setParams(paramsOrig); - INDArray params2 = net.params(); + INDArray params2 = net.getModelParams(); assertEquals(paramsOrig, params2); } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/custom/TestCustomLayers.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/custom/TestCustomLayers.java index 4ef8fab18..75e48861d 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/custom/TestCustomLayers.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/custom/TestCustomLayers.java @@ -154,7 +154,7 @@ public class TestCustomLayers extends BaseDL4JTest { MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); net2.init(); - assertEquals(net2.params(), net.params()); + assertEquals(net2.getModelParams(), net.getModelParams()); INDArray testFeatures = Nd4j.rand(1, 10); INDArray testLabels = Nd4j.zeros(1, 10); @@ -207,7 +207,7 @@ public class TestCustomLayers extends BaseDL4JTest { ComputationGraph net2 = new ComputationGraph(conf2); net2.init(); - assertEquals(net2.params(), net.params()); + assertEquals(net2.getModelParams(), net.getModelParams()); INDArray testFeatures = Nd4j.rand(1, 10); INDArray testLabels = Nd4j.zeros(1, 10); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/custom/testclasses/CustomLayer.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/custom/testclasses/CustomLayer.java index ea59a8091..4fafcde0b 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/custom/testclasses/CustomLayer.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/custom/testclasses/CustomLayer.java @@ -56,7 +56,7 @@ public class CustomLayer extends FeedForwardLayer { boolean initializeParams, DataType networkDataType) { LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); CustomLayerImpl ret = new CustomLayerImpl(lconf, networkDataType); - ret.setListeners(trainingListeners); + ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); Map paramTable = initializer().init(this, layerParamsView, initializeParams); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/custom/testclasses/CustomOutputLayer.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/custom/testclasses/CustomOutputLayer.java index 80c983589..350f24f4b 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/custom/testclasses/CustomOutputLayer.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/custom/testclasses/CustomOutputLayer.java @@ -54,7 +54,7 @@ public class CustomOutputLayer extends BaseOutputLayer { int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); CustomOutputLayerImpl ret = new CustomOutputLayerImpl(lconf, networkDataType); - ret.setListeners(trainingListeners); + ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); Map paramTable = initializer().init(this, layerParamsView, initializeParams); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/dense/DenseTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/dense/DenseTest.java index ba1129cef..01cc7f2dd 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/dense/DenseTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/dense/DenseTest.java @@ -72,7 +72,7 @@ public class DenseTest extends BaseDL4JTest { DataSet test = iter.next(); - assertEquals(model.params(), model2.params()); + assertEquals(model.getModelParams(), model2.getModelParams()); Evaluation eval = new Evaluation(); INDArray output = model.output(test.getFeatures()); @@ -99,7 +99,7 @@ public class DenseTest extends BaseDL4JTest { DataSet test = iter.next(); - assertEquals(model.params(), model2.params()); + assertEquals(model.getModelParams(), model2.getModelParams()); Evaluation eval = new Evaluation(); INDArray output = model.output(test.getFeatures()); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/embedding/EmbeddingLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/embedding/EmbeddingLayerTest.java index 60c4e3b0d..742f38a2d 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/embedding/EmbeddingLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/embedding/EmbeddingLayerTest.java @@ -169,7 +169,7 @@ public class EmbeddingLayerTest extends BaseDL4JTest { net.init(); net2.init(); - net2.setParams(net.params().dup()); + net2.setParams(net.getModelParams().dup()); int batchSize = 3; INDArray inEmbedding = Nd4j.create(batchSize, 1); @@ -216,7 +216,7 @@ public class EmbeddingLayerTest extends BaseDL4JTest { net.init(); net2.init(); - net2.setParams(net.params().dup()); + net2.setParams(net.getModelParams().dup()); int batchSize = 3; INDArray inEmbedding = Nd4j.create(batchSize, 1); @@ -262,7 +262,7 @@ public class EmbeddingLayerTest extends BaseDL4JTest { net.init(); net2.init(); - net2.setParams(net.params().dup()); + net2.setParams(net.getModelParams().dup()); int batchSize = 3; INDArray inEmbedding = Nd4j.create(batchSize, 1); @@ -287,7 +287,7 @@ public class EmbeddingLayerTest extends BaseDL4JTest { net.computeGradientAndScore(); net2.computeGradientAndScore(); - assertEquals(net2.score(), net.score(), 1e-6); + assertEquals(net2.getScore(), net.getScore(), 1e-6); Map gradient = net.gradient().gradientForVariable(); Map gradient2 = net2.gradient().gradientForVariable(); @@ -323,7 +323,7 @@ public class EmbeddingLayerTest extends BaseDL4JTest { net.init(); net2.init(); - net2.setParams(net.params().dup()); + net2.setParams(net.getModelParams().dup()); int batchSize = 3; INDArray inEmbedding = Nd4j.create(batchSize, 1); @@ -349,7 +349,7 @@ public class EmbeddingLayerTest extends BaseDL4JTest { net2.computeGradientAndScore(); // System.out.println(net.score() + "\t" + net2.score()); - assertEquals(net2.score(), net.score(), 1e-6); + assertEquals(net2.getScore(), net.getScore(), 1e-6); Map gradient = net.gradient().gradientForVariable(); Map gradient2 = net2.gradient().gradientForVariable(); @@ -395,7 +395,7 @@ public class EmbeddingLayerTest extends BaseDL4JTest { net.init(); net2.init(); - net2.setParams(net.params().dup()); + net2.setParams(net.getModelParams().dup()); INDArray inEmbedding = Nd4j.create(batchSize, 1, timeSeriesLength); INDArray inOneHot = Nd4j.create(batchSize, nClassesIn, timeSeriesLength); @@ -422,7 +422,7 @@ public class EmbeddingLayerTest extends BaseDL4JTest { net2.computeGradientAndScore(); // System.out.println(net.score() + "\t" + net2.score()); - assertEquals(net2.score(), net.score(), 1e-5); + assertEquals(net2.getScore(), net.getScore(), 1e-5); Map gradient = net.gradient().gradientForVariable(); Map gradient2 = net2.gradient().gradientForVariable(); @@ -484,7 +484,7 @@ public class EmbeddingLayerTest extends BaseDL4JTest { MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); net2.init(); - net2.setParams(net.params().dup()); + net2.setParams(net.getModelParams().dup()); INDArray inEmbedding = Nd4j.zeros(nExamples, 1, timeSeriesLength); INDArray inDense = Nd4j.zeros(nExamples, numInputClasses, timeSeriesLength); @@ -523,7 +523,7 @@ public class EmbeddingLayerTest extends BaseDL4JTest { net2.computeGradientAndScore(); // System.out.println(net.score() + "\t" + net2.score()); - assertEquals(net2.score(), net.score(), 1e-5); + assertEquals(net2.getScore(), net.getScore(), 1e-5); Map gradients = net.gradient().gradientForVariable(); Map gradients2 = net2.gradient().gradientForVariable(); @@ -640,7 +640,7 @@ public class EmbeddingLayerTest extends BaseDL4JTest { MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); net2.init(); - net2.setParams(net.params().dup()); + net2.setParams(net.getModelParams().dup()); INDArray inEmbedding = Nd4j.zeros(inLabelDtype, inputRank == 2 ? new long[]{nExamples, timeSeriesLength} : new long[]{nExamples, 1, timeSeriesLength}); INDArray inDense = Nd4j.zeros(inLabelDtype, nExamples, numInputClasses, timeSeriesLength); @@ -678,7 +678,7 @@ public class EmbeddingLayerTest extends BaseDL4JTest { net.computeGradientAndScore(); net2.computeGradientAndScore(); - assertEquals(net2.score(), net.score(), 1e-5); + assertEquals(net2.getScore(), net.getScore(), 1e-5); Map gradients = net.gradient().gradientForVariable(); Map gradients2 = net2.gradient().gradientForVariable(); @@ -777,9 +777,9 @@ public class EmbeddingLayerTest extends BaseDL4JTest { MultiLayerNetwork net3 = new MultiLayerNetwork(conf3); net3.init(); - INDArray p1 = net.params(); - INDArray p2 = net2.params(); - INDArray p3 = net3.params(); + INDArray p1 = net.getModelParams(); + INDArray p2 = net2.getModelParams(); + INDArray p3 = net3.getModelParams(); boolean eq = p1.equalsWithEps(p2, 1e-4); String str = (seq ? "EmbeddingSequenceLayer" : "EmbeddingLayer") + " - " + wi; assertTrue(eq, str + " p1/p2 params not equal"); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/objdetect/TestYolo2OutputLayer.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/objdetect/TestYolo2OutputLayer.java index 558041072..5ef6fb110 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/objdetect/TestYolo2OutputLayer.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/objdetect/TestYolo2OutputLayer.java @@ -514,7 +514,7 @@ public class TestYolo2OutputLayer extends BaseDL4JTest { MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); - net.setListeners(new ScoreIterationListener(100)); + net.addTrainingListeners(new ScoreIterationListener(100)); int nEpochs = 1000; DataSet ds = iter.next(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/ocnn/OCNNOutputLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/ocnn/OCNNOutputLayerTest.java index c0f6fa24c..a52716589 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/ocnn/OCNNOutputLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/ocnn/OCNNOutputLayerTest.java @@ -79,13 +79,13 @@ public class OCNNOutputLayerTest extends BaseDL4JTest { if (doLearningFirst) { //Run a number of iterations of learning network.setInput(arr); - network.setListeners(new ScoreIterationListener(1)); + network.addTrainingListeners(new ScoreIterationListener(1)); network.computeGradientAndScore(); - double scoreBefore = network.score(); + double scoreBefore = network.getScore(); for (int j = 0; j < 10; j++) network.fit(ds); network.computeGradientAndScore(); - double scoreAfter = network.score(); + double scoreAfter = network.getScore(); //Can't test in 'characteristic mode of operation' if not learning String msg = "testLayer() - score did not (sufficiently) decrease during learning - activationFn=" + "relu" + ", lossFn=" + "ocnn" + ", " + "sigmoid" @@ -147,7 +147,7 @@ public class OCNNOutputLayerTest extends BaseDL4JTest { tmpFile.deleteOnExit(); MultiLayerNetwork multiLayerNetwork = ModelSerializer.restoreMultiLayerNetwork(tmpFile); - assertEquals(network.params(),multiLayerNetwork.params()); + assertEquals(network.getModelParams(),multiLayerNetwork.getModelParams()); assertEquals(network.numParams(),multiLayerNetwork.numParams()); } @@ -187,7 +187,7 @@ public class OCNNOutputLayerTest extends BaseDL4JTest { .build(); MultiLayerNetwork network = new MultiLayerNetwork(configuration); network.init(); - network.setListeners(new ScoreIterationListener(1)); + network.addTrainingListeners(new ScoreIterationListener(1)); return network; } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/BidirectionalTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/BidirectionalTest.java index 8e329077c..101a55edb 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/BidirectionalTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/BidirectionalTest.java @@ -124,7 +124,7 @@ public class BidirectionalTest extends BaseDL4JTest { assertEquals(n1, n2); } - net2.setParams(net1.params()); //Assuming exact same layout here... + net2.setParams(net1.getModelParams()); //Assuming exact same layout here... INDArray in; if (rnnDataFormat == NCW){ @@ -154,7 +154,7 @@ public class BidirectionalTest extends BaseDL4JTest { net2.computeGradientAndScore(); //Ensure scores are equal: - assertEquals(net1.score(), net2.score(), 1e-6); + assertEquals(net1.getScore(), net2.getScore(), 1e-6); //Ensure gradients are equal: Gradient g1 = net1.gradient(); @@ -174,8 +174,8 @@ public class BidirectionalTest extends BaseDL4JTest { net1.fit(in, labels); net2.fit(in, labels); - INDArray p1 = net1.params(); - INDArray p2 = net2.params(); + INDArray p1 = net1.getModelParams(); + INDArray p2 = net2.getModelParams(); assertEquals(p1, p2); } } @@ -232,7 +232,7 @@ public class BidirectionalTest extends BaseDL4JTest { assertEquals(n1, n2); } - net2.setParams(net1.params()); //Assuming exact same layout here... + net2.setParams(net1.getModelParams()); //Assuming exact same layout here... INDArray in = Nd4j.rand(3, 10, 5); @@ -253,7 +253,7 @@ public class BidirectionalTest extends BaseDL4JTest { net2.computeGradientAndScore(); //Ensure scores are equal: - assertEquals(net1.score(), net2.score(), 1e-6); + assertEquals(net1.getScore(), net2.getScore(), 1e-6); //Ensure gradients are equal: Gradient g1 = net1.gradient(); @@ -273,8 +273,8 @@ public class BidirectionalTest extends BaseDL4JTest { net1.fit(new DataSet(in, labels)); net2.fit(new DataSet(in, labels)); - INDArray p1 = net1.params(); - INDArray p2 = net2.params(); + INDArray p1 = net1.getModelParams(); + INDArray p2 = net2.getModelParams(); assertEquals(p1, p2); } } @@ -340,7 +340,7 @@ public class BidirectionalTest extends BaseDL4JTest { net1.computeGradientAndScore(); net2.computeGradientAndScore(); - assertEquals(net1.score(), net2.score(), 1e-6); + assertEquals(net1.getScore(), net2.getScore(), 1e-6); assertEquals(net1.gradient().gradient(), net2.gradient().gradient()); } } @@ -403,7 +403,7 @@ public class BidirectionalTest extends BaseDL4JTest { net1.computeGradientAndScore(); net2.computeGradientAndScore(); - assertEquals(net1.score(), net2.score(), 1e-6); + assertEquals(net1.getScore(), net2.getScore(), 1e-6); assertEquals(net1.gradient().gradient(), net2.gradient().gradient()); } } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTMTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTMTest.java index 7d8dd8977..be04304b6 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTMTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTMTest.java @@ -277,7 +277,7 @@ public class GravesBidirectionalLSTMTest extends BaseDL4JTest { final INDArray act1 = bidirectionalLSTM.activate(sig, false, LayerWorkspaceMgr.noWorkspaces()); - params = bidirectionalLSTM.params(); + params = bidirectionalLSTM.getModelParams(); bidirectionalLSTM.setParamsTable(params); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/RnnDataFormatTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/RnnDataFormatTests.java index c6b315cb5..93a60f38c 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/RnnDataFormatTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/RnnDataFormatTests.java @@ -285,9 +285,9 @@ public class RnnDataFormatTests extends BaseDL4JTest { public static void testHelper(TestCase tc) { - tc.net2.params().assign(tc.net1.params()); - tc.net3.params().assign(tc.net1.params()); - tc.net4.params().assign(tc.net1.params()); + tc.net2.getModelParams().assign(tc.net1.getModelParams()); + tc.net3.getModelParams().assign(tc.net1.getModelParams()); + tc.net4.getModelParams().assign(tc.net1.getModelParams()); INDArray inNCW = tc.inNCW; INDArray inNWC = tc.inNCW.permute(0, 2, 1).dup(); @@ -352,9 +352,9 @@ public class RnnDataFormatTests extends BaseDL4JTest { tc.net3.fit(inNWC, tc.labelsNWC); tc.net4.fit(inNWC, tc.labelsNWC); - assertEquals(tc.net1.params(), tc.net2.params(), tc.msg); - assertEquals(tc.net1.params(), tc.net3.params(), tc.msg); - assertEquals(tc.net1.params(), tc.net4.params(), tc.msg); + assertEquals(tc.net1.getModelParams(), tc.net2.getModelParams(), tc.msg); + assertEquals(tc.net1.getModelParams(), tc.net3.getModelParams(), tc.msg); + assertEquals(tc.net1.getModelParams(), tc.net4.getModelParams(), tc.msg); //Test serialization MultiLayerNetwork net1a = TestUtils.testModelSerialization(tc.net1); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestRnnLayers.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestRnnLayers.java index e2b6bc359..d6e0369d4 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestRnnLayers.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestRnnLayers.java @@ -23,7 +23,6 @@ package org.deeplearning4j.nn.layers.recurrent; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.TestUtils; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration.NeuralNetConfigurationBuilder; import org.deeplearning4j.nn.conf.RNNFormat; import org.deeplearning4j.nn.conf.dropout.TestDropout; import org.deeplearning4j.nn.conf.layers.GravesLSTM; @@ -173,8 +172,8 @@ public class TestRnnLayers extends BaseDL4JTest { MultiLayerNetwork netD2 = new MultiLayerNetwork(confD2); netD2.init(); - assertEquals(net.params(), netD.params(), s); - assertEquals(net.params(), netD2.params(), s); + assertEquals(net.getModelParams(), netD.getModelParams(), s); + assertEquals(net.getModelParams(), netD2.getModelParams(), s); INDArray f = Nd4j.rand(DataType.FLOAT, 3, 10, 10); @@ -193,7 +192,7 @@ public class TestRnnLayers extends BaseDL4JTest { INDArray l = TestUtils.randomOneHotTimeSeries(3, 10, 10, 12345); net.fit(f.dup(), l); netD.fit(f.dup(), l); - assertNotEquals(net.params(), netD.params(), s); + assertNotEquals(net.getModelParams(), netD.getModelParams(), s); netD2.fit(f.dup(), l); netD2.fit(f.dup(), l); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestTimeDistributed.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestTimeDistributed.java index 5a31cf4df..ec8008379 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestTimeDistributed.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestTimeDistributed.java @@ -115,7 +115,7 @@ public class TestTimeDistributed extends BaseDL4JTest { net1.fit(ds); net2.fit(ds); - assertEquals(net1.params(), net2.params()); + assertEquals(net1.getModelParams(), net2.getModelParams()); MultiLayerNetwork net3 = TestUtils.testModelSerialization(net2); out2 = net2.output(in); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffDense.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffDense.java index 60446d43f..93d9421c3 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffDense.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffDense.java @@ -124,10 +124,10 @@ public class TestSameDiffDense extends BaseDL4JTest { MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); net2.init(); - net.params().assign(net2.params()); + net.getModelParams().assign(net2.getModelParams()); //Check params: - assertEquals(net2.params(), net.params()); + assertEquals(net2.getModelParams(), net.getModelParams()); Map params1 = net.getParamTable(); Map params2 = net2.getParamTable(); assertEquals(params2, params1); @@ -209,10 +209,10 @@ public class TestSameDiffDense extends BaseDL4JTest { MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); net2.init(); - assertEquals(net2.params(), net.params()); + assertEquals(net2.getModelParams(), net.getModelParams()); //Check params: - assertEquals(net2.params(), net.params()); + assertEquals(net2.getModelParams(), net.getModelParams()); Map params1 = net.getParamTable(); Map params2 = net2.getParamTable(); assertEquals(params2, params1); @@ -287,10 +287,10 @@ public class TestSameDiffDense extends BaseDL4JTest { MultiLayerNetwork netStandard = new MultiLayerNetwork(conf2); netStandard.init(); - netSD.params().assign(netStandard.params()); + netSD.getModelParams().assign(netStandard.getModelParams()); //Check params: - assertEquals(netStandard.params(), netSD.params()); + assertEquals(netStandard.getModelParams(), netSD.getModelParams()); assertEquals(netStandard.getParamTable(), netSD.getParamTable()); INDArray in = Nd4j.rand(minibatch, nIn); @@ -379,10 +379,10 @@ public class TestSameDiffDense extends BaseDL4JTest { MultiLayerNetwork netStandard = new MultiLayerNetwork(conf2); netStandard.init(); - netSD.params().assign(netStandard.params()); + netSD.getModelParams().assign(netStandard.getModelParams()); //Check params: - assertEquals(netStandard.params(), netSD.params()); + assertEquals(netStandard.getModelParams(), netSD.getModelParams()); assertEquals(netStandard.getParamTable(), netSD.getParamTable()); DataSetIterator iter = new IrisDataSetIterator(150, 150); @@ -398,7 +398,7 @@ public class TestSameDiffDense extends BaseDL4JTest { netStandard.fit(ds); String s = String.valueOf(i); assertEquals( netStandard.getFlattenedGradients(), netSD.getFlattenedGradients(), s); - assertEquals( netStandard.params(), netSD.params(), s); + assertEquals( netStandard.getModelParams(), netSD.getModelParams(), s); assertEquals( netStandard.getUpdater().getStateViewArray(), netSD.getUpdater().getStateViewArray(), s); } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffDenseVertex.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffDenseVertex.java index 5e67862ff..5fd371d13 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffDenseVertex.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffDenseVertex.java @@ -100,10 +100,10 @@ public class TestSameDiffDenseVertex extends BaseDL4JTest { ComputationGraph netStandard = new ComputationGraph(conf2); netStandard.init(); - netSD.params().assign(netStandard.params()); + netSD.getModelParams().assign(netStandard.getModelParams()); //Check params: - assertEquals(netStandard.params(), netSD.params()); + assertEquals(netStandard.getModelParams(), netSD.getModelParams()); assertEquals(netStandard.getParamTable(), netSD.getParamTable()); INDArray in = Nd4j.rand(minibatch, nIn); @@ -160,7 +160,7 @@ public class TestSameDiffDenseVertex extends BaseDL4JTest { netStandard.fit(ds); assertEquals(netStandard.getParamTable(), netSD.getParamTable()); - assertEquals(netStandard.params(), netSD.params()); + assertEquals(netStandard.getModelParams(), netSD.getModelParams()); assertEquals(netStandard.getFlattenedGradients(), netSD.getFlattenedGradients()); } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffLambda.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffLambda.java index 8da331f8e..1514a6709 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffLambda.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffLambda.java @@ -98,7 +98,7 @@ public class TestSameDiffLambda extends BaseDL4JTest { ComputationGraph std = new ComputationGraph(confStd); std.init(); - lambda.setParams(std.params()); + lambda.setParams(std.getModelParams()); INDArray in = Nd4j.rand(3, 5); INDArray labels = TestUtils.randomOneHot(3, 5); @@ -119,7 +119,7 @@ public class TestSameDiffLambda extends BaseDL4JTest { std.fit(ds); String s = String.valueOf(i); - assertEquals(std.params(), lambda.params(), s); + assertEquals(std.getModelParams(), lambda.getModelParams(), s); assertEquals(std.getFlattenedGradients(), lambda.getFlattenedGradients(), s); } @@ -182,7 +182,7 @@ public class TestSameDiffLambda extends BaseDL4JTest { ComputationGraph std = new ComputationGraph(confStd); std.init(); - lambda.setParams(std.params()); + lambda.setParams(std.getModelParams()); INDArray in1 = Nd4j.rand(3, 5); INDArray in2 = Nd4j.rand(3, 5); @@ -204,7 +204,7 @@ public class TestSameDiffLambda extends BaseDL4JTest { std.fit(mds); String s = String.valueOf(i); - assertEquals(std.params(), lambda.params(), s); + assertEquals(std.getModelParams(), lambda.getModelParams(), s); assertEquals(std.getFlattenedGradients(), lambda.getFlattenedGradients(), s); } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffOutput.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffOutput.java index 8ff1d6bc9..0a3d2f915 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffOutput.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffOutput.java @@ -85,7 +85,7 @@ public class TestSameDiffOutput extends BaseDL4JTest { netSD.fit(ds); netStd.fit(ds); - assertEquals(netStd.params(), netSD.params()); + assertEquals(netStd.getModelParams(), netSD.getModelParams()); assertEquals(netStd.getFlattenedGradients(), netSD.getFlattenedGradients()); } @@ -131,7 +131,7 @@ public class TestSameDiffOutput extends BaseDL4JTest { MultiLayerNetwork netStd = new MultiLayerNetwork(confStd); netStd.init(); - netSD.params().assign(netStd.params()); + netSD.getModelParams().assign(netStd.getModelParams()); assertEquals(netStd.getParamTable(), netSD.getParamTable()); @@ -165,7 +165,7 @@ public class TestSameDiffOutput extends BaseDL4JTest { netSD.fit(ds); netStd.fit(ds); String s = String.valueOf(i); - assertEquals( netStd.params(), netSD.params(), s); + assertEquals( netStd.getModelParams(), netSD.getModelParams(), s); assertEquals( netStd.getFlattenedGradients(), netSD.getFlattenedGradients(),s ); } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/variational/TestVAE.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/variational/TestVAE.java index 639520492..b7c89e007 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/variational/TestVAE.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/variational/TestVAE.java @@ -77,7 +77,7 @@ public class TestVAE extends BaseDL4JTest { net.init(); System.out.println("Exp num params: " + expNumParams); - assertEquals(expNumParams, net.getLayer(0).params().length()); + assertEquals(expNumParams, net.getLayer(0).getParams().length()); Map paramTable = net.getLayer(0).getParamTable(); int count = 0; for (INDArray arr : paramTable.values()) { diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/CloseNetworkTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/CloseNetworkTests.java index 0b8b1877d..5ed2a9c2b 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/CloseNetworkTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/CloseNetworkTests.java @@ -79,7 +79,7 @@ public class CloseNetworkTests extends BaseDL4JTest { net.close(); - assertTrue(net.params().wasClosed()); + assertTrue(net.getModelParams().wasClosed()); if(train) { assertTrue(net.getGradientsViewArray().wasClosed()); Updater u = net.getUpdater(false); @@ -127,7 +127,7 @@ public class CloseNetworkTests extends BaseDL4JTest { net.close(); - assertTrue(net.params().wasClosed()); + assertTrue(net.getModelParams().wasClosed()); if(train) { assertTrue(net.getGradientsViewArray().wasClosed()); Updater u = net.getUpdater(false); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/LargeNetTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/LargeNetTest.java index 09dfb45ea..052e1fa07 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/LargeNetTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/LargeNetTest.java @@ -57,7 +57,7 @@ public class LargeNetTest extends BaseDL4JTest { MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); - INDArray params = net.params(); + INDArray params = net.getModelParams(); long paramsLength = params.length(); long expParamsLength = 10_000_000L * 300 + 300 * 10 + 10; assertEquals(expParamsLength, paramsLength); @@ -91,7 +91,7 @@ public class LargeNetTest extends BaseDL4JTest { ComputationGraph net = new ComputationGraph(conf); net.init(); - INDArray params = net.params(); + INDArray params = net.getModelParams(); long paramsLength = params.length(); long expParamsLength = 10_000_000L * 300 + 300 * 10 + 10; assertEquals(expParamsLength, paramsLength); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/TestLrChanges.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/TestLrChanges.java index f6ddd312c..69099f0a0 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/TestLrChanges.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/TestLrChanges.java @@ -76,7 +76,7 @@ public class TestLrChanges extends BaseDL4JTest { net2.init(); net2.getUpdater().getStateViewArray().assign(net.getUpdater().getStateViewArray()); conf2.setIterationCount(conf.getIterationCount()); - net2.setParams(net.params().dup()); + net2.setParams(net.getModelParams().dup()); assertEquals(0.1, net.getLearningRate(0).doubleValue(), 0.0); net.setLearningRate(0, 0.5); //Set LR for layer 0 to 0.5 @@ -96,7 +96,7 @@ public class TestLrChanges extends BaseDL4JTest { net2.fit(in, l); } - assertEquals(net.params(), net2.params()); + assertEquals(net.getModelParams(), net2.getModelParams()); assertEquals(net.getUpdater().getStateViewArray(), net2.getUpdater().getStateViewArray()); INDArray in1 = Nd4j.rand(10, 10); @@ -110,7 +110,7 @@ public class TestLrChanges extends BaseDL4JTest { net2.setLabels(l1); net2.computeGradientAndScore(); - assertEquals(net.score(), net2.score(), 1e-8); + assertEquals(net.getScore(), net2.getScore(), 1e-8); //Now: Set *all* LRs to say 0.3... @@ -126,7 +126,7 @@ public class TestLrChanges extends BaseDL4JTest { net3.init(); net3.getUpdater().getStateViewArray().assign(net.getUpdater().getStateViewArray()); conf3.setIterationCount(conf.getIterationCount()); - net3.setParams(net.params().dup()); + net3.setParams(net.getModelParams().dup()); net.setLearningRate(0.3); @@ -139,7 +139,7 @@ public class TestLrChanges extends BaseDL4JTest { net3.fit(in, l); } - assertEquals(net.params(), net3.params()); + assertEquals(net.getModelParams(), net3.getModelParams()); assertEquals(net.getUpdater().getStateViewArray(), net3.getUpdater().getStateViewArray()); } @@ -206,7 +206,7 @@ public class TestLrChanges extends BaseDL4JTest { net2.init(); net2.getUpdater().getStateViewArray().assign(net.getUpdater().getStateViewArray()); conf2.setIterationCount(conf.getIterationCount()); - net2.setParams(net.params().dup()); + net2.setParams(net.getModelParams().dup()); net.setLearningRate(new ExponentialSchedule(ScheduleType.ITERATION, 0.5, 0.8 )); //Set LR for layer 0 to 0.5 @@ -224,7 +224,7 @@ public class TestLrChanges extends BaseDL4JTest { net2.fit(in, l); } - assertEquals(net.params(), net2.params()); + assertEquals(net.getModelParams(), net2.getModelParams()); assertEquals(net.getUpdater().getStateViewArray(), net2.getUpdater().getStateViewArray()); } @@ -270,7 +270,7 @@ public class TestLrChanges extends BaseDL4JTest { net2.init(); net2.getUpdater().getStateViewArray().assign(net.getUpdater().getStateViewArray()); conf2.setIterationCount(conf.getIterationCount()); - net2.setParams(net.params().dup()); + net2.setParams(net.getModelParams().dup()); assertEquals(0.1, net.getLearningRate("0").doubleValue(), 0.0); net.setLearningRate("0", 0.5); //Set LR for layer 0 to 0.5 @@ -290,7 +290,7 @@ public class TestLrChanges extends BaseDL4JTest { net2.fit(new DataSet(in, l)); } - assertEquals(net.params(), net2.params()); + assertEquals(net.getModelParams(), net2.getModelParams()); assertEquals(net.getUpdater().getStateViewArray(), net2.getUpdater().getStateViewArray()); INDArray in1 = Nd4j.rand(10, 10); @@ -304,7 +304,7 @@ public class TestLrChanges extends BaseDL4JTest { net2.setLabels(l1); net2.computeGradientAndScore(); - assertEquals(net.score(), net2.score(), 1e-8); + assertEquals(net.getScore(), net2.getScore(), 1e-8); //Now: Set *all* LRs to say 0.3... @@ -320,7 +320,7 @@ public class TestLrChanges extends BaseDL4JTest { net3.init(); net3.getUpdater().getStateViewArray().assign(net.getUpdater().getStateViewArray()); conf3.setIterationCount(conf.getIterationCount()); - net3.setParams(net.params().dup()); + net3.setParams(net.getModelParams().dup()); net.setLearningRate(0.3); @@ -333,7 +333,7 @@ public class TestLrChanges extends BaseDL4JTest { net3.fit(new DataSet(in, l)); } - assertEquals(net.params(), net3.params()); + assertEquals(net.getModelParams(), net3.getModelParams()); assertEquals(net.getUpdater().getStateViewArray(), net3.getUpdater().getStateViewArray()); } @@ -375,7 +375,7 @@ public class TestLrChanges extends BaseDL4JTest { net2.init(); net2.getUpdater().getStateViewArray().assign(net.getUpdater().getStateViewArray()); conf2.setIterationCount(conf.getIterationCount()); - net2.setParams(net.params().dup()); + net2.setParams(net.getModelParams().dup()); net.setLearningRate(new ExponentialSchedule(ScheduleType.ITERATION, 0.5, 0.8 )); //Set LR for layer 0 to 0.5 @@ -393,7 +393,7 @@ public class TestLrChanges extends BaseDL4JTest { net2.fit(new DataSet(in, l)); } - assertEquals(net.params(), net2.params()); + assertEquals(net.getModelParams(), net2.getModelParams()); assertEquals(net.getUpdater().getStateViewArray(), net2.getUpdater().getStateViewArray()); } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/TestNetConversion.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/TestNetConversion.java index fdfb16fcd..01278db4e 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/TestNetConversion.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/TestNetConversion.java @@ -77,14 +77,14 @@ public class TestNetConversion extends BaseDL4JTest { n.computeGradientAndScore(); cg.computeGradientAndScore(); - assertEquals(n.score(), cg.score(), 1e-6); + assertEquals(n.getScore(), cg.getScore(), 1e-6); assertEquals(n.gradient().gradient(), cg.gradient().gradient()); n.fit(in, labels); cg.fit(new INDArray[]{in}, new INDArray[]{labels}); - assertEquals(n.params(), cg.params()); + assertEquals(n.getModelParams(), cg.getModelParams()); } } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/WorkspaceTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/WorkspaceTests.java index 794b45411..904dd845b 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/WorkspaceTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/WorkspaceTests.java @@ -476,7 +476,7 @@ public class WorkspaceTests extends BaseDL4JTest { final ComputationGraph computationGraph = new ComputationGraph(config); computationGraph.init(); - computationGraph.setListeners(new ScoreIterationListener(3)); + computationGraph.addTrainingListeners(new ScoreIterationListener(3)); WSTestDataSetIterator iterator = new WSTestDataSetIterator(); computationGraph.fit(iterator); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/BackPropMLPTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/BackPropMLPTest.java index 27efa9149..c818f2281 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/BackPropMLPTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/BackPropMLPTest.java @@ -54,7 +54,7 @@ public class BackPropMLPTest extends BaseDL4JTest { public void testMLPTrivial() { //Simplest possible case: 1 hidden layer, 1 hidden neuron, batch size of 1. MultiLayerNetwork network = new MultiLayerNetwork(getIrisMLPSimpleConfig(new int[] {1}, Activation.SIGMOID)); - network.setListeners(new ScoreIterationListener(1)); + network.addTrainingListeners(new ScoreIterationListener(1)); network.init(); DataSetIterator iter = new IrisDataSetIterator(1, 10); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTest.java index 4fb1c3fad..ac1626eda 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTest.java @@ -64,7 +64,7 @@ import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.ActivationLayer; import org.deeplearning4j.nn.conf.layers.AutoEncoder; -import org.deeplearning4j.nn.conf.layers.BaseLayer; +import org.deeplearning4j.nn.conf.layers.BaseLayerConfiguration; import org.deeplearning4j.nn.conf.layers.BatchNormalization; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; import org.deeplearning4j.nn.conf.layers.DenseLayer; @@ -184,13 +184,13 @@ public class MultiLayerTest extends BaseDL4JTest { MultiLayerNetwork network3 = new MultiLayerNetwork(conf); network3.init(); - INDArray params = network3.params(); + INDArray params = network3.getModelParams(); INDArray weights = network3.getLayer(0).getParam(DefaultParamInitializer.WEIGHT_KEY).dup(); INDArray bias = network3.getLayer(0).getParam(DefaultParamInitializer.BIAS_KEY).dup(); network3.setParameters(params); assertEquals(weights, network3.getLayer(0).getParam(DefaultParamInitializer.WEIGHT_KEY)); assertEquals(bias, network3.getLayer(0).getParam(DefaultParamInitializer.BIAS_KEY)); - INDArray params4 = network3.params(); + INDArray params4 = network3.getModelParams(); assertEquals(params, params4); } @@ -211,7 +211,7 @@ public class MultiLayerTest extends BaseDL4JTest { MultiLayerNetwork network = new MultiLayerNetwork(conf); network.init(); - network.setListeners(new ScoreIterationListener(1)); + network.addTrainingListeners(new ScoreIterationListener(1)); DataSetIterator iter = new IrisDataSetIterator(150, 150); @@ -242,7 +242,7 @@ public class MultiLayerTest extends BaseDL4JTest { MultiLayerNetwork network = new MultiLayerNetwork(conf); network.init(); - network.setListeners(new ScoreIterationListener(1)); + network.addTrainingListeners(new ScoreIterationListener(1)); DataSetIterator iter = new IrisDataSetIterator(150, 150); @@ -330,7 +330,7 @@ public class MultiLayerTest extends BaseDL4JTest { MultiLayerNetwork model = new MultiLayerNetwork(conf); model.init(); - model.addListeners(new ScoreIterationListener(listenerFreq)); + model.addTrainingListeners(new ScoreIterationListener(listenerFreq)); log.info("Train model...."); int cnt = 0; @@ -503,7 +503,7 @@ public class MultiLayerTest extends BaseDL4JTest { assertEquals(layerNameList.get(0), net.getLayer(0).getLayerConfiguration().getLayerName()); assertEquals(layerNameList, net.getLayerNames()); - BaseLayer b = (BaseLayer) net.getLayer(layerNameList.get(2)).getLayerConfiguration(); + BaseLayerConfiguration b = (BaseLayerConfiguration) net.getLayer(layerNameList.get(2)).getLayerConfiguration(); assertEquals("softmax", b.getActivationFn().toString()); } @@ -535,7 +535,7 @@ public class MultiLayerTest extends BaseDL4JTest { MultiLayerNetwork netNoReg = new MultiLayerNetwork(confNoReg); netNoReg.init(); - netNoReg.setParameters(net.params().dup()); + netNoReg.setParameters(net.getModelParams().dup()); //Score single example, and compare to scoreExamples: INDArray input = Nd4j.rand(3, nIn); @@ -703,7 +703,7 @@ public class MultiLayerTest extends BaseDL4JTest { MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); net.fit(iter.next()); - // TODO validate actual layer gradientView - issue getting var out of BaseLayer w/o adding MLN getter that gets confused with local gradient vars + // TODO validate actual layer gradientView - issue getting var out of BaseLayerConfiguration w/o adding MLN getter that gets confused with local gradient vars Gradient actualGradient = net.gradient; assertNotEquals(expectedGradient.getGradientFor("0_W"), actualGradient.getGradientFor("0_W")); @@ -716,13 +716,13 @@ public class MultiLayerTest extends BaseDL4JTest { net.setParam("0_b", Nd4j.ones(1, 5)); net.setParam("1_W", Nd4j.ones(5, 3)); net.setParam("1_b", Nd4j.ones(1, 3)); - INDArray actualParams = net.params(); + INDArray actualParams = net.getModelParams(); // Confirm params assertEquals(expectedGradient.gradient(), actualParams); net.update(expectedGradient); - actualParams = net.params(); + actualParams = net.getModelParams(); assertEquals(Nd4j.ones(1, 43).addi(1), actualParams); } @@ -762,7 +762,7 @@ public class MultiLayerTest extends BaseDL4JTest { MultiLayerNetwork aePre = getAeModel(true, nIn, nOut); int actualNP = (int) aePre.numParams(); assertEquals(2 * (nIn * nOut + nOut) + nIn, actualNP); - INDArray params = aePre.params(); + INDArray params = aePre.getModelParams(); assertEquals(params.length(), actualNP); // check num params Map paramTable = aePre.getParamTable(); assertTrue(paramTable.containsKey("0_vb")); // check vb exists for pretrain layer @@ -774,7 +774,7 @@ public class MultiLayerTest extends BaseDL4JTest { MultiLayerNetwork aeNoPre = getAeModel(false, nIn, nOut); actualNP = (int) aeNoPre.numParams(); assertEquals(2 * (nIn * nOut + nOut) + nIn, actualNP); - params = aeNoPre.params(); + params = aeNoPre.getModelParams(); assertEquals(params.length(), actualNP); paramTable = aePre.getParamTable(); assertTrue(paramTable.containsKey("0_vb")); @@ -865,14 +865,14 @@ public class MultiLayerTest extends BaseDL4JTest { MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); net2.init(); - BaseLayer bl0 = (BaseLayer) net2.getLayer(0).getLayerConfiguration(); + BaseLayerConfiguration bl0 = (BaseLayerConfiguration) net2.getLayer(0).getLayerConfiguration(); assertEquals(0.1, TestUtils.getL1(bl0.getRegularizationBias()), 1e-6); assertEquals(0.2, TestUtils.getL2(bl0.getRegularizationBias()), 1e-6); INDArray features = Nd4j.rand(10, 10); INDArray labels = Nd4j.rand(10, 10); - net2.setParams(net1.params().dup()); + net2.setParams(net1.getModelParams().dup()); net1.setInput(features); net1.setLabels(labels); @@ -888,15 +888,15 @@ public class MultiLayerTest extends BaseDL4JTest { r = net2.calcRegularizationScore(true); assertEquals(0.0, r, 0.0); - double s1 = net1.score(); - double s2 = net2.score(); + double s1 = net1.getScore(); + double s2 = net2.getScore(); assertEquals(s1, s2, 1e-6); //Biases initialized to 0 -> should initially have same score for (int i = 0; i < 10; i++) { net1.fit(features, labels); } - net2.setParams(net1.params().dup()); + net2.setParams(net1.getModelParams().dup()); net1.computeGradientAndScore(); net2.computeGradientAndScore(); @@ -906,8 +906,8 @@ public class MultiLayerTest extends BaseDL4JTest { r = net2.calcRegularizationScore(true); assertTrue(r > 0.0); - s1 = net1.score(); - s2 = net2.score(); + s1 = net1.getScore(); + s2 = net2.getScore(); assertNotEquals(s1, s2, 1e-6); //Scores should differ due to bias l1/l2 @@ -1022,11 +1022,11 @@ public class MultiLayerTest extends BaseDL4JTest { MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); net2.init(); - assertNotEquals(net1.params(), net2.params()); + assertNotEquals(net1.getModelParams(), net2.getModelParams()); assertNotEquals(net1.getParamTable(), net2.getParamTable()); net1.setParamTable(net2.getParamTable()); - assertEquals(net1.params(), net2.params()); + assertEquals(net1.getModelParams(), net2.getModelParams()); assertEquals(net1.getParamTable(), net2.getParamTable()); } @@ -1412,7 +1412,7 @@ public class MultiLayerTest extends BaseDL4JTest { exp.add(MultiLayerNetwork.class); CheckModelsListener listener = new CheckModelsListener(); - net.setListeners(listener); + net.addTrainingListeners(listener); INDArray f = Nd4j.create(1, 10); INDArray l = Nd4j.create(1, 10); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTestRNN.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTestRNN.java index 99c1c6077..29d7e7a6a 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTestRNN.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTestRNN.java @@ -753,9 +753,9 @@ public class MultiLayerTestRNN extends BaseDL4JTest { DataSet ds = new DataSet(features, labels, maskArrayInput, maskArrayOutput); - INDArray initialParams = mln.params().dup(); + INDArray initialParams = mln.getModelParams().dup(); mln.fit(ds); - INDArray afterParams = mln.params(); + INDArray afterParams = mln.getModelParams(); assertNotEquals(initialParams, afterParams); } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/TestMasking.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/TestMasking.java index 1cca6ede8..d98cd58b2 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/TestMasking.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/TestMasking.java @@ -172,7 +172,7 @@ public class TestMasking extends BaseDL4JTest { net.setLabels(labels); net.computeGradientAndScore(); - double score1 = net.score(); + double score1 = net.getScore(); INDArray grad1 = net.gradient().gradient(); //Now: change the label values for the masked steps. The @@ -187,7 +187,7 @@ public class TestMasking extends BaseDL4JTest { assertNotEquals(labels, newLabels); - double score2 = net.score(); + double score2 = net.getScore(); INDArray grad2 = net.gradient().gradient(); assertEquals(score1, score2, 1e-6); @@ -214,7 +214,7 @@ public class TestMasking extends BaseDL4JTest { graph.setLabels(labels); graph.computeGradientAndScore(); - double gScore1 = graph.score(); + double gScore1 = graph.getScore(); INDArray gGrad1 = graph.gradient().gradient(); graph.setLayerMaskArrays(null, new INDArray[] {labelMask}); @@ -222,7 +222,7 @@ public class TestMasking extends BaseDL4JTest { graph.setLabels(newLabels); graph.computeGradientAndScore(); - double gScore2 = graph.score(); + double gScore2 = graph.getScore(); INDArray gGrad2 = graph.gradient().gradient(); assertEquals(gScore1, gScore2, 1e-6); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/TestSetGetParameters.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/TestSetGetParameters.java index 7b75bc97b..9c3c1407b 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/TestSetGetParameters.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/TestSetGetParameters.java @@ -53,12 +53,12 @@ public class TestSetGetParameters extends BaseDL4JTest { MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); - INDArray initParams = net.params().dup(); + INDArray initParams = net.getModelParams().dup(); Map initParams2 = net.getParamTable(); - net.setParams(net.params()); + net.setParams(net.getModelParams()); - INDArray initParamsAfter = net.params(); + INDArray initParamsAfter = net.getModelParams(); Map initParams2After = net.getParamTable(); for (String s : initParams2.keySet()) { @@ -71,7 +71,7 @@ public class TestSetGetParameters extends BaseDL4JTest { INDArray randomParams = Nd4j.rand(initParams.dataType(), initParams.shape()); net.setParams(randomParams.dup()); - assertEquals(net.params(), randomParams); + assertEquals(net.getModelParams(), randomParams); } @Test @@ -90,12 +90,12 @@ public class TestSetGetParameters extends BaseDL4JTest { MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); - INDArray initParams = net.params().dup(); + INDArray initParams = net.getModelParams().dup(); Map initParams2 = net.getParamTable(); - net.setParams(net.params()); + net.setParams(net.getModelParams()); - INDArray initParamsAfter = net.params(); + INDArray initParamsAfter = net.getModelParams(); Map initParams2After = net.getParamTable(); for (String s : initParams2.keySet()) { @@ -108,7 +108,7 @@ public class TestSetGetParameters extends BaseDL4JTest { INDArray randomParams = Nd4j.rand(initParams.dataType(), initParams.shape()); net.setParams(randomParams.dup()); - assertEquals(net.params(), randomParams); + assertEquals(net.getModelParams(), randomParams); } @Test @@ -128,7 +128,7 @@ public class TestSetGetParameters extends BaseDL4JTest { MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); - INDArray params = net.params(); + INDArray params = net.getModelParams(); MultiLayerNetwork net2 = new MultiLayerNetwork(conf); @@ -137,11 +137,11 @@ public class TestSetGetParameters extends BaseDL4JTest { MultiLayerNetwork net3 = new MultiLayerNetwork(conf); net3.init(params, false); - assertEquals(params, net2.params()); - assertEquals(params, net3.params()); + assertEquals(params, net2.getModelParams()); + assertEquals(params, net3.getModelParams()); - assertNotSame(params, net2.params()); //Different objects due to clone - assertSame(params, net3.params()); //Same object due to clone + assertNotSame(params, net2.getModelParams()); //Different objects due to clone + assertSame(params, net3.getModelParams()); //Same object due to clone Map paramsMap = net.getParamTable(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/TestVariableLengthTS.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/TestVariableLengthTS.java index 7dc7480c6..6f3747e84 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/TestVariableLengthTS.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/TestVariableLengthTS.java @@ -103,14 +103,14 @@ public class TestVariableLengthTS extends BaseDL4JTest { net.setInput(in1); net.setLabels(labels1); net.computeGradientAndScore(); - double score1 = net.score(); + double score1 = net.getScore(); Gradient g1 = net.gradient(); net.setInput(in2); net.setLabels(labels2); net.setLayerMaskArrays(null, labelMask); net.computeGradientAndScore(); - double score2 = net.score(); + double score2 = net.getScore(); Gradient g2 = net.gradient(); //Scores and gradients should be identical for two cases (given mask array) @@ -134,7 +134,7 @@ public class TestVariableLengthTS extends BaseDL4JTest { } net.setLabels(labels2); net.computeGradientAndScore(); - double score2a = net.score(); + double score2a = net.getScore(); Gradient g2a = net.gradient(); assertEquals(score2, score2a, 1e-6); for (String s : g2map.keySet()) { @@ -196,7 +196,7 @@ public class TestVariableLengthTS extends BaseDL4JTest { net.setInput(in1); net.setLabels(labels1); net.computeGradientAndScore(); - double score1 = net.score(); + double score1 = net.getScore(); Gradient g1 = net.gradient(); Map map1 = g1.gradientForVariable(); for (String s : map1.keySet()) { @@ -207,7 +207,7 @@ public class TestVariableLengthTS extends BaseDL4JTest { net.setLabels(labels2); net.setLayerMaskArrays(inputMask, null); net.computeGradientAndScore(); - double score2 = net.score(); + double score2 = net.getScore(); Gradient g2 = net.gradient(); net.setInput(in2); @@ -240,7 +240,7 @@ public class TestVariableLengthTS extends BaseDL4JTest { net.setInput(in2); net.setLayerMaskArrays(inputMask, null); net.computeGradientAndScore(); - double score2a = net.score(); + double score2a = net.getScore(); Gradient g2a = net.gradient(); assertEquals(score2, score2a, 1e-12); for (String s : g2.gradientForVariable().keySet()) { @@ -327,7 +327,7 @@ public class TestVariableLengthTS extends BaseDL4JTest { mln.setLabels(labels); mln.computeGradientAndScore(); - double score = mln.score(); + double score = mln.getScore(); assertEquals(expScore, score, 0.1, msg); } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/rl/TestMultiModelGradientApplication.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/rl/TestMultiModelGradientApplication.java index 19360abb7..0539c6262 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/rl/TestMultiModelGradientApplication.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/rl/TestMultiModelGradientApplication.java @@ -77,7 +77,7 @@ public class TestMultiModelGradientApplication extends BaseDL4JTest { MultiLayerNetwork net2GradUpd = new MultiLayerNetwork(conf.clone()); net2GradUpd.init(); - assertEquals(net1GradCalc.params(), net2GradUpd.params()); + assertEquals(net1GradCalc.getModelParams(), net2GradUpd.getModelParams()); INDArray f = Nd4j.rand(minibatch, nIn); INDArray l = Nd4j.create(minibatch, nOut); @@ -109,17 +109,17 @@ public class TestMultiModelGradientApplication extends BaseDL4JTest { //Also: if we apply the gradient using a subi op, we should get the same final params as if we did a fit op // on the original network - net2GradUpd.params().subi(g.gradient()); + net2GradUpd.getModelParams().subi(g.gradient()); net1GradCalc.fit(f, l); - assertEquals(net1GradCalc.params(), net2GradUpd.params()); + assertEquals(net1GradCalc.getModelParams(), net2GradUpd.getModelParams()); //============================= if (!(u instanceof Sgd)) { net2GradUpd.getUpdater().getStateViewArray().assign(net1GradCalc.getUpdater().getStateViewArray()); } - assertEquals(net1GradCalc.params(), net2GradUpd.params()); + assertEquals(net1GradCalc.getModelParams(), net2GradUpd.getModelParams()); assertEquals(net1GradCalc.getUpdater().getStateViewArray(), net2GradUpd.getUpdater().getStateViewArray()); @@ -130,7 +130,7 @@ public class TestMultiModelGradientApplication extends BaseDL4JTest { for (int i = 0; i < 100; i++) { net1GradCalc.fit(f, l); net2GradUpd.fit(f, l); - assertEquals(net1GradCalc.params(), net2GradUpd.params()); + assertEquals(net1GradCalc.getModelParams(), net2GradUpd.getModelParams()); } } } @@ -169,7 +169,7 @@ public class TestMultiModelGradientApplication extends BaseDL4JTest { ComputationGraph net2GradUpd = new ComputationGraph(conf.clone()); net2GradUpd.init(); - assertEquals(net1GradCalc.params(), net2GradUpd.params()); + assertEquals(net1GradCalc.getModelParams(), net2GradUpd.getModelParams()); INDArray f = Nd4j.rand(minibatch, nIn); INDArray l = Nd4j.create(minibatch, nOut); @@ -201,16 +201,16 @@ public class TestMultiModelGradientApplication extends BaseDL4JTest { //Also: if we apply the gradient using a subi op, we should get the same final params as if we did a fit op // on the original network - net2GradUpd.params().subi(g.gradient()); + net2GradUpd.getModelParams().subi(g.gradient()); net1GradCalc.fit(new INDArray[] {f}, new INDArray[] {l}); - assertEquals(net1GradCalc.params(), net2GradUpd.params()); + assertEquals(net1GradCalc.getModelParams(), net2GradUpd.getModelParams()); //============================= if (!(u instanceof Sgd)) { net2GradUpd.getUpdater().getStateViewArray().assign(net1GradCalc.getUpdater().getStateViewArray()); } - assertEquals(net1GradCalc.params(), net2GradUpd.params()); + assertEquals(net1GradCalc.getModelParams(), net2GradUpd.getModelParams()); assertEquals(net1GradCalc.getUpdater().getStateViewArray(), net2GradUpd.getUpdater().getStateViewArray()); @@ -222,7 +222,7 @@ public class TestMultiModelGradientApplication extends BaseDL4JTest { for (int i = 0; i < 100; i++) { net1GradCalc.fit(new INDArray[] {f}, new INDArray[] {l}); net2GradUpd.fit(new INDArray[] {f}, new INDArray[] {l}); - assertEquals(net1GradCalc.params(), net2GradUpd.params()); + assertEquals(net1GradCalc.getModelParams(), net2GradUpd.getModelParams()); } } } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningCompGraphTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningCompGraphTest.java index 195ee2f6d..a8bfd8d97 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningCompGraphTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningCompGraphTest.java @@ -25,7 +25,6 @@ import org.deeplearning4j.TestUtils; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration.NeuralNetConfigurationBuilder; import org.deeplearning4j.nn.conf.constraint.UnitNormConstraint; import org.deeplearning4j.nn.conf.distribution.ConstantDistribution; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; @@ -94,7 +93,7 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { ComputationGraph modelToFineTune = new ComputationGraph(expectedConf); modelToFineTune.init(); - modelToFineTune.setParams(expectedModel.params()); + modelToFineTune.setParams(expectedModel.getModelParams()); //model after applying changes with transfer learning ComputationGraph modelNow = new TransferLearning.GraphBuilder(modelToFineTune) @@ -108,8 +107,8 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { //Check params after fit modelNow.fit(randomData); expectedModel.fit(randomData); - assertEquals(modelNow.score(), expectedModel.score(), 1e-8); - assertEquals(modelNow.params(), expectedModel.params()); + assertEquals(modelNow.getScore(), expectedModel.getScore(), 1e-8); + assertEquals(modelNow.getModelParams(), expectedModel.getModelParams()); } @Test @@ -139,9 +138,9 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { //.setOutputs("layer3") .build(); - BaseLayer bl0 = ((BaseLayer) modelNow.getLayer("layer0").getLayerConfiguration()); - BaseLayer bl1 = ((BaseLayer) modelNow.getLayer("layer1").getLayerConfiguration()); - BaseLayer bl3 = ((BaseLayer) modelNow.getLayer("layer3").getLayerConfiguration()); + BaseLayerConfiguration bl0 = ((BaseLayerConfiguration) modelNow.getLayer("layer0").getLayerConfiguration()); + BaseLayerConfiguration bl1 = ((BaseLayerConfiguration) modelNow.getLayer("layer1").getLayerConfiguration()); + BaseLayerConfiguration bl3 = ((BaseLayerConfiguration) modelNow.getLayer("layer3").getLayerConfiguration()); assertEquals(bl0.getWeightInitFn(), new WeightInitDistribution(new NormalDistribution(1, 1e-1))); assertEquals(bl1.getWeightInitFn(), new WeightInitXavier()); assertEquals(bl1.getWeightInitFn(), new WeightInitXavier()); @@ -161,22 +160,22 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { modelExpectedArch.init(); //modelNow should have the same architecture as modelExpectedArch - assertArrayEquals(modelExpectedArch.params().shape(), modelNow.params().shape()); - assertArrayEquals(modelExpectedArch.getLayer("layer0").params().shape(), - modelNow.getLayer("layer0").params().shape()); - assertArrayEquals(modelExpectedArch.getLayer("layer1").params().shape(), - modelNow.getLayer("layer1").params().shape()); - assertArrayEquals(modelExpectedArch.getLayer("layer2").params().shape(), - modelNow.getLayer("layer2").params().shape()); - assertArrayEquals(modelExpectedArch.getLayer("layer3").params().shape(), - modelNow.getLayer("layer3").params().shape()); + assertArrayEquals(modelExpectedArch.getModelParams().shape(), modelNow.getModelParams().shape()); + assertArrayEquals(modelExpectedArch.getLayer("layer0").getParams().shape(), + modelNow.getLayer("layer0").getParams().shape()); + assertArrayEquals(modelExpectedArch.getLayer("layer1").getParams().shape(), + modelNow.getLayer("layer1").getParams().shape()); + assertArrayEquals(modelExpectedArch.getLayer("layer2").getParams().shape(), + modelNow.getLayer("layer2").getParams().shape()); + assertArrayEquals(modelExpectedArch.getLayer("layer3").getParams().shape(), + modelNow.getLayer("layer3").getParams().shape()); - modelNow.setParams(modelExpectedArch.params()); + modelNow.setParams(modelExpectedArch.getModelParams()); //fit should give the same results modelExpectedArch.fit(randomData); modelNow.fit(randomData); - assertEquals(modelExpectedArch.score(), modelNow.score(), 1e-8); - assertEquals(modelExpectedArch.params(), modelNow.params()); + assertEquals(modelExpectedArch.getScore(), modelNow.getScore(), 1e-8); + assertEquals(modelExpectedArch.getModelParams(), modelNow.getModelParams()); } @Test @@ -227,22 +226,22 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { modelExpectedArch.init(); //modelNow should have the same architecture as modelExpectedArch - assertArrayEquals(modelExpectedArch.params().shape(), modelNow.params().shape()); - assertArrayEquals(modelExpectedArch.getLayer("layer0").params().shape(), - modelNow.getLayer("layer0").params().shape()); - assertArrayEquals(modelExpectedArch.getLayer("layer1").params().shape(), - modelNow.getLayer("layer1").params().shape()); - assertArrayEquals(modelExpectedArch.getLayer("layer2").params().shape(), - modelNow.getLayer("layer2").params().shape()); - assertArrayEquals(modelExpectedArch.getLayer("layer3").params().shape(), - modelNow.getLayer("layer3").params().shape()); + assertArrayEquals(modelExpectedArch.getModelParams().shape(), modelNow.getModelParams().shape()); + assertArrayEquals(modelExpectedArch.getLayer("layer0").getParams().shape(), + modelNow.getLayer("layer0").getParams().shape()); + assertArrayEquals(modelExpectedArch.getLayer("layer1").getParams().shape(), + modelNow.getLayer("layer1").getParams().shape()); + assertArrayEquals(modelExpectedArch.getLayer("layer2").getParams().shape(), + modelNow.getLayer("layer2").getParams().shape()); + assertArrayEquals(modelExpectedArch.getLayer("layer3").getParams().shape(), + modelNow.getLayer("layer3").getParams().shape()); - modelNow.setParams(modelExpectedArch.params()); + modelNow.setParams(modelExpectedArch.getModelParams()); //fit should give the same results modelExpectedArch.fit(randomData); modelNow.fit(randomData); - assertEquals(modelExpectedArch.score(), modelNow.score(), 1e-8); - assertEquals(modelExpectedArch.params(), modelNow.params()); + assertEquals(modelExpectedArch.getScore(), modelNow.getScore(), 1e-8); + assertEquals(modelExpectedArch.getModelParams(), modelNow.getModelParams()); } @Test @@ -385,14 +384,14 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { assertEquals(modelExpectedArch.getComputationGraphConfiguration().toJson(), modelNow.getComputationGraphConfiguration().toJson()); - modelNow.setParams(modelExpectedArch.params()); + modelNow.setParams(modelExpectedArch.getModelParams()); int i = 0; while (i < 5) { modelExpectedArch.fit(randomData); modelNow.fit(randomData); i++; } - assertEquals(modelExpectedArch.params(), modelNow.params()); + assertEquals(modelExpectedArch.getModelParams(), modelNow.getModelParams()); } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningComplex.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningComplex.java index ba201c62a..cee6e2f90 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningComplex.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningComplex.java @@ -26,10 +26,9 @@ import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration.NeuralNetConfigurationBuilder; import org.deeplearning4j.nn.conf.graph.MergeVertex; import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.conf.layers.BaseLayer; +import org.deeplearning4j.nn.conf.layers.BaseLayerConfiguration; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.graph.ComputationGraph; @@ -99,7 +98,7 @@ public class TransferLearningComplex extends BaseDL4JTest { } //Also check config: - BaseLayer bl = ((BaseLayer) l.getLayerConfiguration()); + BaseLayerConfiguration bl = ((BaseLayerConfiguration) l.getLayerConfiguration()); assertEquals(new Adam(2e-2), bl.getIUpdater()); assertEquals(Activation.LEAKYRELU.getActivationFunction(), bl.getActivationFn()); } @@ -154,8 +153,8 @@ public class TransferLearningComplex extends BaseDL4JTest { .setOutputs("outRight").build(); ComputationGraph modelOther = new ComputationGraph(otherConf); modelOther.init(); - modelOther.getLayer("denseRight0").setParams(modelToTune.getLayer("denseRight0").params()); - modelOther.getLayer("outRight").setParams(modelToTune.getLayer("outRight").params()); + modelOther.getLayer("denseRight0").setParams(modelToTune.getLayer("denseRight0").getParams()); + modelOther.getLayer("outRight").setParams(modelToTune.getLayer("outRight").getParams()); modelToTune.getVertex("denseCentre0").setLayerAsFrozen(); ComputationGraph modelNow = @@ -179,11 +178,11 @@ public class TransferLearningComplex extends BaseDL4JTest { assertEquals(otherRandData.getFeatures(0), modelToTune.feedForward(randData.getFeatures(), false).get("denseCentre0")); - assertEquals(modelOther.getLayer("denseRight0").params(), modelNow.getLayer("denseRight0").params()); - assertEquals(modelOther.getLayer("denseRight0").params(), modelToTune.getLayer("denseRight0").params()); + assertEquals(modelOther.getLayer("denseRight0").getParams(), modelNow.getLayer("denseRight0").getParams()); + assertEquals(modelOther.getLayer("denseRight0").getParams(), modelToTune.getLayer("denseRight0").getParams()); - assertEquals(modelOther.getLayer("outRight").params(), modelNow.getLayer("outRight").params()); - assertEquals(modelOther.getLayer("outRight").params(), modelToTune.getLayer("outRight").params()); + assertEquals(modelOther.getLayer("outRight").getParams(), modelNow.getLayer("outRight").getParams()); + assertEquals(modelOther.getLayer("outRight").getParams(), modelToTune.getLayer("outRight").getParams()); n++; } @@ -237,11 +236,11 @@ public class TransferLearningComplex extends BaseDL4JTest { assertEquals(otherRandData.getFeatures(0), modelToTune.feedForward(randData.getFeatures(), false).get("denseCentre0")); - assertEquals(modelToTune.getLayer("denseRight0").params(), modelNow.getLayer("denseRight0").params()); + assertEquals(modelToTune.getLayer("denseRight0").getParams(), modelNow.getLayer("denseRight0").getParams()); - assertEquals(modelToTune.getLayer("outRight").params(), modelNow.getLayer("outRight").params()); + assertEquals(modelToTune.getLayer("outRight").getParams(), modelNow.getLayer("outRight").getParams()); - assertEquals(modelToTune.getLayer("outCentre").params(), modelNow.getLayer("outCentre").params()); + assertEquals(modelToTune.getLayer("outCentre").getParams(), modelNow.getLayer("outCentre").getParams()); n++; } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningHelperTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningHelperTest.java index f606e6402..48963619b 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningHelperTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningHelperTest.java @@ -178,25 +178,25 @@ public class TransferLearningHelperTest extends BaseDL4JTest { TransferLearningHelper helper = new TransferLearningHelper(modelToTune, "denseCentre2"); MultiDataSet featurizedDataSet = helper.featurize(origData); - assertEquals(modelIdentical.getLayer("denseRight0").params(), modelToTune.getLayer("denseRight0").params()); + assertEquals(modelIdentical.getLayer("denseRight0").getParams(), modelToTune.getLayer("denseRight0").getParams()); modelIdentical.fit(origData); helper.fitFeaturized(featurizedDataSet); - assertEquals(modelIdentical.getLayer("denseCentre0").params(), modelToTune.getLayer("denseCentre0").params()); - assertEquals(modelIdentical.getLayer("denseCentre1").params(), modelToTune.getLayer("denseCentre1").params()); - assertEquals(modelIdentical.getLayer("denseCentre2").params(), modelToTune.getLayer("denseCentre2").params()); - assertEquals(modelIdentical.getLayer("denseCentre3").params(), modelToTune.getLayer("denseCentre3").params()); - assertEquals(modelIdentical.getLayer("outCentre").params(), modelToTune.getLayer("outCentre").params()); + assertEquals(modelIdentical.getLayer("denseCentre0").getParams(), modelToTune.getLayer("denseCentre0").getParams()); + assertEquals(modelIdentical.getLayer("denseCentre1").getParams(), modelToTune.getLayer("denseCentre1").getParams()); + assertEquals(modelIdentical.getLayer("denseCentre2").getParams(), modelToTune.getLayer("denseCentre2").getParams()); + assertEquals(modelIdentical.getLayer("denseCentre3").getParams(), modelToTune.getLayer("denseCentre3").getParams()); + assertEquals(modelIdentical.getLayer("outCentre").getParams(), modelToTune.getLayer("outCentre").getParams()); assertEquals(modelIdentical.getLayer("denseRight").getNetConfiguration().toJson(), modelToTune.getLayer("denseRight").getNetConfiguration().toJson()); - assertEquals(modelIdentical.getLayer("denseRight").params(), modelToTune.getLayer("denseRight").params()); + assertEquals(modelIdentical.getLayer("denseRight").getParams(), modelToTune.getLayer("denseRight").getParams()); assertEquals(modelIdentical.getLayer("denseRight0").getNetConfiguration().toJson(), modelToTune.getLayer("denseRight0").getNetConfiguration().toJson()); //assertEquals(modelIdentical.getLayer("denseRight0").params(),modelToTune.getLayer("denseRight0").params()); - assertEquals(modelIdentical.getLayer("denseRight1").params(), modelToTune.getLayer("denseRight1").params()); - assertEquals(modelIdentical.getLayer("outRight").params(), modelToTune.getLayer("outRight").params()); - assertEquals(modelIdentical.getLayer("denseLeft0").params(), modelToTune.getLayer("denseLeft0").params()); - assertEquals(modelIdentical.getLayer("outLeft").params(), modelToTune.getLayer("outLeft").params()); + assertEquals(modelIdentical.getLayer("denseRight1").getParams(), modelToTune.getLayer("denseRight1").getParams()); + assertEquals(modelIdentical.getLayer("outRight").getParams(), modelToTune.getLayer("outRight").getParams()); + assertEquals(modelIdentical.getLayer("denseLeft0").getParams(), modelToTune.getLayer("denseLeft0").getParams()); + assertEquals(modelIdentical.getLayer("outLeft").getParams(), modelToTune.getLayer("outLeft").getParams()); // log.info(modelIdentical.summary()); // log.info(helper.unfrozenGraph().summary()); @@ -230,7 +230,7 @@ public class TransferLearningHelperTest extends BaseDL4JTest { TransferLearningHelper helper = new TransferLearningHelper(modelToFineTune, 1); INDArray paramsLastTwoLayers = - Nd4j.hstack(modelToFineTune.getLayer(2).params(), modelToFineTune.getLayer(3).params()); + Nd4j.hstack(modelToFineTune.getLayer(2).getParams(), modelToFineTune.getLayer(3).getParams()); MultiLayerNetwork notFrozen = new MultiLayerNetwork( (NeuralNetConfiguration) overallConf.clone().list() .layer(0, new Builder().nIn(2).nOut(3).build()) @@ -248,9 +248,9 @@ public class TransferLearningHelperTest extends BaseDL4JTest { modelNow.fit(randomData); } - INDArray expected = Nd4j.hstack(modelToFineTune.getLayer(0).params(), modelToFineTune.getLayer(1).params(), - notFrozen.params()); - INDArray act = modelNow.params(); + INDArray expected = Nd4j.hstack(modelToFineTune.getLayer(0).getParams(), modelToFineTune.getLayer(1).getParams(), + notFrozen.getModelParams()); + INDArray act = modelNow.getModelParams(); assertEquals(expected, act); } } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningMLNTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningMLNTest.java index f33c48738..88e8d5d01 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningMLNTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningMLNTest.java @@ -91,7 +91,7 @@ public class TransferLearningMLNTest extends BaseDL4JTest { .build(); for (org.deeplearning4j.nn.api.Layer l : modelNow.getLayers()) { - BaseLayer bl = ((BaseLayer) l.getLayerConfiguration()); + BaseLayerConfiguration bl = ((BaseLayerConfiguration) l.getLayerConfiguration()); assertEquals(new RmsProp(0.5), bl.getIUpdater()); } @@ -107,9 +107,9 @@ public class TransferLearningMLNTest extends BaseDL4JTest { .build()) .build()); expectedModel.init(); - expectedModel.setParams(modelToFineTune.params().dup()); + expectedModel.setParams(modelToFineTune.getModelParams().dup()); - assertEquals(expectedModel.params(), modelNow.params()); + assertEquals(expectedModel.getModelParams(), modelNow.getModelParams()); //Check json NeuralNetConfiguration expectedConf = expectedModel.getNetConfiguration(); @@ -119,9 +119,9 @@ public class TransferLearningMLNTest extends BaseDL4JTest { modelNow.fit(randomData); expectedModel.fit(randomData); - assertEquals(modelNow.score(), expectedModel.score(), 1e-6); - INDArray pExp = expectedModel.params(); - INDArray pNow = modelNow.params(); + assertEquals(modelNow.getScore(), expectedModel.getScore(), 1e-6); + INDArray pExp = expectedModel.getModelParams(); + INDArray pNow = modelNow.getModelParams(); assertEquals(pExp, pNow); } @@ -160,9 +160,9 @@ public class TransferLearningMLNTest extends BaseDL4JTest { //Will fail - expected because of dist and weight init changes //assertEquals(modelExpectedArch.getConfiguration().toJson(), modelNow.getConfiguration().toJson()); - BaseLayer bl0 = ((BaseLayer) modelNow.getNetConfiguration().getConf(0).getLayer()); - BaseLayer bl1 = ((BaseLayer) modelNow.getNetConfiguration().getConf(1).getLayer()); - BaseLayer bl3 = ((BaseLayer) modelNow.getNetConfiguration().getConf(3).getLayer()); + BaseLayerConfiguration bl0 = ((BaseLayerConfiguration) modelNow.getNetConfiguration().getConf(0).getLayer()); + BaseLayerConfiguration bl1 = ((BaseLayerConfiguration) modelNow.getNetConfiguration().getConf(1).getLayer()); + BaseLayerConfiguration bl3 = ((BaseLayerConfiguration) modelNow.getNetConfiguration().getConf(3).getLayer()); assertEquals(bl0.getWeightInitFn().getClass(), WeightInitXavier.class); try { assertEquals(JsonMappers.getMapper().writeValueAsString(bl1.getWeightInitFn()), @@ -173,18 +173,18 @@ public class TransferLearningMLNTest extends BaseDL4JTest { assertEquals(bl3.getWeightInitFn(), new WeightInitXavier()); //modelNow should have the same architecture as modelExpectedArch - assertArrayEquals(modelExpectedArch.params().shape(), modelNow.params().shape()); - assertArrayEquals(modelExpectedArch.getLayer(0).params().shape(), modelNow.getLayer(0).params().shape()); - assertArrayEquals(modelExpectedArch.getLayer(1).params().shape(), modelNow.getLayer(1).params().shape()); - assertArrayEquals(modelExpectedArch.getLayer(2).params().shape(), modelNow.getLayer(2).params().shape()); - assertArrayEquals(modelExpectedArch.getLayer(3).params().shape(), modelNow.getLayer(3).params().shape()); + assertArrayEquals(modelExpectedArch.getModelParams().shape(), modelNow.getModelParams().shape()); + assertArrayEquals(modelExpectedArch.getLayer(0).getParams().shape(), modelNow.getLayer(0).getParams().shape()); + assertArrayEquals(modelExpectedArch.getLayer(1).getParams().shape(), modelNow.getLayer(1).getParams().shape()); + assertArrayEquals(modelExpectedArch.getLayer(2).getParams().shape(), modelNow.getLayer(2).getParams().shape()); + assertArrayEquals(modelExpectedArch.getLayer(3).getParams().shape(), modelNow.getLayer(3).getParams().shape()); - modelNow.setParams(modelExpectedArch.params()); + modelNow.setParams(modelExpectedArch.getModelParams()); //fit should give the same results modelExpectedArch.fit(randomData); modelNow.fit(randomData); - assertEquals(modelExpectedArch.score(), modelNow.score(), 0.000001); - assertEquals(modelExpectedArch.params(), modelNow.params()); + assertEquals(modelExpectedArch.getScore(), modelNow.getScore(), 0.000001); + assertEquals(modelExpectedArch.getModelParams(), modelNow.getModelParams()); } @@ -227,20 +227,20 @@ public class TransferLearningMLNTest extends BaseDL4JTest { modelExpectedArch.init(); //modelNow should have the same architecture as modelExpectedArch - assertArrayEquals(modelExpectedArch.params().shape(), modelNow.params().shape()); - assertArrayEquals(modelExpectedArch.getLayer(0).params().shape(), modelNow.getLayer(0).params().shape()); - assertArrayEquals(modelExpectedArch.getLayer(1).params().shape(), modelNow.getLayer(1).params().shape()); - assertArrayEquals(modelExpectedArch.getLayer(2).params().shape(), modelNow.getLayer(2).params().shape()); - assertArrayEquals(modelExpectedArch.getLayer(3).params().shape(), modelNow.getLayer(3).params().shape()); + assertArrayEquals(modelExpectedArch.getModelParams().shape(), modelNow.getModelParams().shape()); + assertArrayEquals(modelExpectedArch.getLayer(0).getParams().shape(), modelNow.getLayer(0).getParams().shape()); + assertArrayEquals(modelExpectedArch.getLayer(1).getParams().shape(), modelNow.getLayer(1).getParams().shape()); + assertArrayEquals(modelExpectedArch.getLayer(2).getParams().shape(), modelNow.getLayer(2).getParams().shape()); + assertArrayEquals(modelExpectedArch.getLayer(3).getParams().shape(), modelNow.getLayer(3).getParams().shape()); - modelNow.setParams(modelExpectedArch.params()); + modelNow.setParams(modelExpectedArch.getModelParams()); //fit should give the same results modelExpectedArch.fit(randomData); modelNow.fit(randomData); - double scoreExpected = modelExpectedArch.score(); - double scoreActual = modelNow.score(); + double scoreExpected = modelExpectedArch.getScore(); + double scoreActual = modelNow.getScore(); assertEquals(scoreExpected, scoreActual, 1e-4); - assertEquals(modelExpectedArch.params(), modelNow.params()); + assertEquals(modelExpectedArch.getModelParams(), modelNow.getModelParams()); } @Test @@ -370,14 +370,14 @@ public class TransferLearningMLNTest extends BaseDL4JTest { assertEquals(modelExpectedArch.getNetConfiguration().getConf(5).toJson(), modelNow.getNetConfiguration().getConf(5).toJson()); - assertArrayEquals(modelExpectedArch.params().shape(), modelNow.params().shape()); - assertArrayEquals(modelExpectedArch.getLayer(0).params().shape(), modelNow.getLayer(0).params().shape()); + assertArrayEquals(modelExpectedArch.getModelParams().shape(), modelNow.getModelParams().shape()); + assertArrayEquals(modelExpectedArch.getLayer(0).getParams().shape(), modelNow.getLayer(0).getParams().shape()); //subsampling has no params //assertArrayEquals(modelExpectedArch.getLayer(1).params().shape(), modelNow.getLayer(1).params().shape()); - assertArrayEquals(modelExpectedArch.getLayer(2).params().shape(), modelNow.getLayer(2).params().shape()); - assertArrayEquals(modelExpectedArch.getLayer(3).params().shape(), modelNow.getLayer(3).params().shape()); - assertArrayEquals(modelExpectedArch.getLayer(4).params().shape(), modelNow.getLayer(4).params().shape()); - assertArrayEquals(modelExpectedArch.getLayer(5).params().shape(), modelNow.getLayer(5).params().shape()); + assertArrayEquals(modelExpectedArch.getLayer(2).getParams().shape(), modelNow.getLayer(2).getParams().shape()); + assertArrayEquals(modelExpectedArch.getLayer(3).getParams().shape(), modelNow.getLayer(3).getParams().shape()); + assertArrayEquals(modelExpectedArch.getLayer(4).getParams().shape(), modelNow.getLayer(4).getParams().shape()); + assertArrayEquals(modelExpectedArch.getLayer(5).getParams().shape(), modelNow.getLayer(5).getParams().shape()); } @@ -449,23 +449,23 @@ public class TransferLearningMLNTest extends BaseDL4JTest { .inputType(InputType.convolutionalFlat(12, 12, 20)).build()); notFrozen.init(); - assertArrayEquals(modelToFineTune.getLayer(0).params().shape(), modelNow.getLayer(0).params().shape()); + assertArrayEquals(modelToFineTune.getLayer(0).getParams().shape(), modelNow.getLayer(0).getParams().shape()); //subsampling has no params //assertArrayEquals(modelExpectedArch.getLayer(1).params().shape(), modelNow.getLayer(1).params().shape()); - assertArrayEquals(notFrozen.getLayer(0).params().shape(), modelNow.getLayer(2).params().shape()); - modelNow.getLayer(2).setParams(notFrozen.getLayer(0).params()); + assertArrayEquals(notFrozen.getLayer(0).getParams().shape(), modelNow.getLayer(2).getParams().shape()); + modelNow.getLayer(2).setParams(notFrozen.getLayer(0).getParams()); //subsampling has no params //assertArrayEquals(notFrozen.getLayer(1).params().shape(), modelNow.getLayer(3).params().shape()); - assertArrayEquals(notFrozen.getLayer(2).params().shape(), modelNow.getLayer(4).params().shape()); - modelNow.getLayer(4).setParams(notFrozen.getLayer(2).params()); - assertArrayEquals(notFrozen.getLayer(3).params().shape(), modelNow.getLayer(5).params().shape()); - modelNow.getLayer(5).setParams(notFrozen.getLayer(3).params()); - assertArrayEquals(notFrozen.getLayer(4).params().shape(), modelNow.getLayer(6).params().shape()); - modelNow.getLayer(6).setParams(notFrozen.getLayer(4).params()); - assertArrayEquals(notFrozen.getLayer(5).params().shape(), modelNow.getLayer(7).params().shape()); - modelNow.getLayer(7).setParams(notFrozen.getLayer(5).params()); - assertArrayEquals(notFrozen.getLayer(6).params().shape(), modelNow.getLayer(8).params().shape()); - modelNow.getLayer(8).setParams(notFrozen.getLayer(6).params()); + assertArrayEquals(notFrozen.getLayer(2).getParams().shape(), modelNow.getLayer(4).getParams().shape()); + modelNow.getLayer(4).setParams(notFrozen.getLayer(2).getParams()); + assertArrayEquals(notFrozen.getLayer(3).getParams().shape(), modelNow.getLayer(5).getParams().shape()); + modelNow.getLayer(5).setParams(notFrozen.getLayer(3).getParams()); + assertArrayEquals(notFrozen.getLayer(4).getParams().shape(), modelNow.getLayer(6).getParams().shape()); + modelNow.getLayer(6).setParams(notFrozen.getLayer(4).getParams()); + assertArrayEquals(notFrozen.getLayer(5).getParams().shape(), modelNow.getLayer(7).getParams().shape()); + modelNow.getLayer(7).setParams(notFrozen.getLayer(5).getParams()); + assertArrayEquals(notFrozen.getLayer(6).getParams().shape(), modelNow.getLayer(8).getParams().shape()); + modelNow.getLayer(8).setParams(notFrozen.getLayer(6).getParams()); int i = 0; while (i < 3) { @@ -474,8 +474,8 @@ public class TransferLearningMLNTest extends BaseDL4JTest { i++; } - INDArray expectedParams = Nd4j.hstack(modelToFineTune.getLayer(0).params(), notFrozen.params()); - assertEquals(expectedParams, modelNow.params()); + INDArray expectedParams = Nd4j.hstack(modelToFineTune.getLayer(0).getParams(), notFrozen.getModelParams()); + assertEquals(expectedParams, modelNow.getModelParams()); } @@ -503,13 +503,13 @@ public class TransferLearningMLNTest extends BaseDL4JTest { //Check original net isn't modified: - BaseLayer l0 = (BaseLayer) net.getLayer(0).getLayerConfiguration(); + BaseLayerConfiguration l0 = (BaseLayerConfiguration) net.getLayer(0).getLayerConfiguration(); assertEquals(new Adam(1e-4), l0.getIUpdater()); assertEquals(Activation.TANH.getActivationFunction(), l0.getActivationFn()); assertEquals(new WeightInitRelu(), l0.getWeightInitFn()); assertEquals(0.1, TestUtils.getL1(l0), 1e-6); - BaseLayer l1 = (BaseLayer) net.getLayer(1).getLayerConfiguration(); + BaseLayerConfiguration l1 = (BaseLayerConfiguration) net.getLayer(1).getLayerConfiguration(); assertEquals(new Adam(1e-4), l1.getIUpdater()); assertEquals(Activation.HARDSIGMOID.getActivationFunction(), l1.getActivationFn()); assertEquals(new WeightInitRelu(), l1.getWeightInitFn()); @@ -518,13 +518,13 @@ public class TransferLearningMLNTest extends BaseDL4JTest { assertEquals(BackpropType.Standard, conf.getBackpropType()); //Check new net has only the appropriate things modified (i.e., LR) - l0 = (BaseLayer) net2.getLayer(0).getLayerConfiguration(); + l0 = (BaseLayerConfiguration) net2.getLayer(0).getLayerConfiguration(); assertEquals(new Adam(2e-2), l0.getIUpdater()); assertEquals(Activation.TANH.getActivationFunction(), l0.getActivationFn()); assertEquals(new WeightInitRelu(), l0.getWeightInitFn()); assertEquals(0.1, TestUtils.getL1(l0), 1e-6); - l1 = (BaseLayer) net2.getLayer(1).getLayerConfiguration(); + l1 = (BaseLayerConfiguration) net2.getLayer(1).getLayerConfiguration(); assertEquals(new Adam(2e-2), l1.getIUpdater()); assertEquals(Activation.HARDSIGMOID.getActivationFunction(), l1.getActivationFn()); assertEquals(new WeightInitRelu(), l1.getWeightInitFn()); @@ -586,17 +586,17 @@ public class TransferLearningMLNTest extends BaseDL4JTest { .build()); notFrozen.init(); - assertArrayEquals(modelToFineTune.getLayer(0).params().shape(), modelNow.getLayer(0).params().shape()); + assertArrayEquals(modelToFineTune.getLayer(0).getParams().shape(), modelNow.getLayer(0).getParams().shape()); //subsampling has no params //assertArrayEquals(modelExpectedArch.getLayer(1).params().shape(), modelNow.getLayer(1).params().shape()); - assertArrayEquals(notFrozen.getLayer(0).params().shape(), modelNow.getLayer(2).params().shape()); - modelNow.getLayer(2).setParams(notFrozen.getLayer(0).params()); - assertArrayEquals(notFrozen.getLayer(1).params().shape(), modelNow.getLayer(3).params().shape()); - modelNow.getLayer(3).setParams(notFrozen.getLayer(1).params()); - assertArrayEquals(notFrozen.getLayer(2).params().shape(), modelNow.getLayer(4).params().shape()); - modelNow.getLayer(4).setParams(notFrozen.getLayer(2).params()); - assertArrayEquals(notFrozen.getLayer(3).params().shape(), modelNow.getLayer(5).params().shape()); - modelNow.getLayer(5).setParams(notFrozen.getLayer(3).params()); + assertArrayEquals(notFrozen.getLayer(0).getParams().shape(), modelNow.getLayer(2).getParams().shape()); + modelNow.getLayer(2).setParams(notFrozen.getLayer(0).getParams()); + assertArrayEquals(notFrozen.getLayer(1).getParams().shape(), modelNow.getLayer(3).getParams().shape()); + modelNow.getLayer(3).setParams(notFrozen.getLayer(1).getParams()); + assertArrayEquals(notFrozen.getLayer(2).getParams().shape(), modelNow.getLayer(4).getParams().shape()); + modelNow.getLayer(4).setParams(notFrozen.getLayer(2).getParams()); + assertArrayEquals(notFrozen.getLayer(3).getParams().shape(), modelNow.getLayer(5).getParams().shape()); + modelNow.getLayer(5).setParams(notFrozen.getLayer(3).getParams()); int i = 0; while (i < 3) { @@ -605,8 +605,8 @@ public class TransferLearningMLNTest extends BaseDL4JTest { i++; } - INDArray expectedParams = Nd4j.hstack(modelToFineTune.getLayer(0).params(), notFrozen.params()); - assertEquals(expectedParams, modelNow.params()); + INDArray expectedParams = Nd4j.hstack(modelToFineTune.getLayer(0).getParams(), notFrozen.getModelParams()); + assertEquals(expectedParams, modelNow.getModelParams()); } @Test diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/updater/TestUpdaters.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/updater/TestUpdaters.java index cf73bb012..f92e34bf2 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/updater/TestUpdaters.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/updater/TestUpdaters.java @@ -99,7 +99,7 @@ public class TestUpdaters extends BaseDL4JTest { BaseLayer layer = (BaseLayer) conf.getFirstLayer().instantiate(conf, null, 0, params, true, params.dataType()); layer.setBackpropGradientsViewArray(gradients); Updater updater = UpdaterCreator.getUpdater(layer); - int updaterStateSize = (int) layer.layerConf().getIUpdater().stateSize(numParams); + int updaterStateSize = (int) layer.getTypedLayerConfiguration().getIUpdater().stateSize(numParams); INDArray updaterState = Nd4j.create(1, updaterStateSize); updater.setStateViewArray(layer, updaterState, true); @@ -144,7 +144,7 @@ public class TestUpdaters extends BaseDL4JTest { msdx.put(key, msdxTmp); count++; } - assertEquals(rho, ((AdaDelta)layer.layerConf().getIUpdater()).getRho(), 1e-4); + assertEquals(rho, ((AdaDelta)layer.getTypedLayerConfiguration().getIUpdater()).getRho(), 1e-4); } assertEquals(4, count); @@ -165,7 +165,7 @@ public class TestUpdaters extends BaseDL4JTest { BaseLayer layer = (BaseLayer) conf.getFirstLayer().instantiate(conf, null, 0, params, true, params.dataType()); layer.setBackpropGradientsViewArray(gradients); Updater updater = UpdaterCreator.getUpdater(layer); - int updaterStateSize = (int) layer.layerConf().getIUpdater().stateSize(numParams); + int updaterStateSize = (int) layer.getTypedLayerConfiguration().getIUpdater().stateSize(numParams); INDArray updaterState = Nd4j.create(1, updaterStateSize); updater.setStateViewArray(layer, updaterState, true); @@ -185,7 +185,7 @@ public class TestUpdaters extends BaseDL4JTest { assertEquals(gradExpected, gradient.getGradientFor(entry.getKey())); count++; } - assertEquals(lr, ((AdaGrad)layer.layerConf().getIUpdater()).getLearningRate(), 1e-4); + assertEquals(lr, ((AdaGrad)layer.getTypedLayerConfiguration().getIUpdater()).getLearningRate(), 1e-4); assertEquals(2, count); } @@ -209,7 +209,7 @@ public class TestUpdaters extends BaseDL4JTest { BaseLayer layer = (BaseLayer) conf.getFirstLayer().instantiate(conf, null, 0, params, true, params.dataType()); layer.setBackpropGradientsViewArray(gradients); Updater updater = UpdaterCreator.getUpdater(layer); - int updaterStateSize = (int) layer.layerConf().getIUpdater().stateSize(numParams); + int updaterStateSize = (int) layer.getTypedLayerConfiguration().getIUpdater().stateSize(numParams); INDArray updaterState = Nd4j.create(1, updaterStateSize); updater.setStateViewArray(layer, updaterState, true); @@ -245,8 +245,8 @@ public class TestUpdaters extends BaseDL4JTest { count++; } - assertEquals(beta1, ((Adam)layer.layerConf().getIUpdater()).getBeta1(), 1e-4); - assertEquals(beta2, ((Adam)layer.layerConf().getIUpdater()).getBeta2(), 1e-4); + assertEquals(beta1, ((Adam)layer.getTypedLayerConfiguration().getIUpdater()).getBeta1(), 1e-4); + assertEquals(beta2, ((Adam)layer.getTypedLayerConfiguration().getIUpdater()).getBeta2(), 1e-4); assertEquals(2, count); } @@ -273,7 +273,7 @@ public class TestUpdaters extends BaseDL4JTest { layer.setBackpropGradientsViewArray(gradients); Updater updater = UpdaterCreator.getUpdater(layer); - int updaterStateSize = (int) layer.layerConf().getIUpdater().stateSize(numParams); + int updaterStateSize = (int) layer.getTypedLayerConfiguration().getIUpdater().stateSize(numParams); INDArray updaterState = Nd4j.create(1, updaterStateSize); updater.setStateViewArray(layer, updaterState, true); @@ -362,7 +362,7 @@ public class TestUpdaters extends BaseDL4JTest { BaseLayer layer = (BaseLayer) conf.getFirstLayer().instantiate(conf, null, 0, params, true, params.dataType()); layer.setBackpropGradientsViewArray(gradients); Updater updater = UpdaterCreator.getUpdater(layer); - int updaterStateSize = (int) layer.layerConf().getIUpdater().stateSize(numParams); + int updaterStateSize = (int) layer.getTypedLayerConfiguration().getIUpdater().stateSize(numParams); INDArray updaterState = Nd4j.create(1, updaterStateSize); updater.setStateViewArray(layer, updaterState, true); @@ -398,8 +398,8 @@ public class TestUpdaters extends BaseDL4JTest { count++; } - assertEquals(beta1, ((AdaMax)layer.layerConf().getIUpdater()).getBeta1(), 1e-4); - assertEquals(beta2, ((AdaMax)layer.layerConf().getIUpdater()).getBeta2(), 1e-4); + assertEquals(beta1, ((AdaMax)layer.getTypedLayerConfiguration().getIUpdater()).getBeta1(), 1e-4); + assertEquals(beta2, ((AdaMax)layer.getTypedLayerConfiguration().getIUpdater()).getBeta2(), 1e-4); assertEquals(2, count); } @@ -418,7 +418,7 @@ public class TestUpdaters extends BaseDL4JTest { BaseLayer layer = (BaseLayer) conf.getFirstLayer().instantiate(conf, null, 0, params, true, params.dataType()); layer.setBackpropGradientsViewArray(gradients); Updater updater = UpdaterCreator.getUpdater(layer); - int updaterStateSize = (int) layer.layerConf().getIUpdater().stateSize(numParams); + int updaterStateSize = (int) layer.getTypedLayerConfiguration().getIUpdater().stateSize(numParams); INDArray updaterState = Nd4j.create(1, updaterStateSize); updater.setStateViewArray(layer, updaterState, true); @@ -443,7 +443,7 @@ public class TestUpdaters extends BaseDL4JTest { count++; } - assertEquals(mu, ((Nesterovs)layer.layerConf().getIUpdater()).getMomentum(), 1e-4); + assertEquals(mu, ((Nesterovs)layer.getTypedLayerConfiguration().getIUpdater()).getMomentum(), 1e-4); assertEquals(2, count); } @@ -465,7 +465,7 @@ public class TestUpdaters extends BaseDL4JTest { BaseLayer layer = (BaseLayer) conf.getFirstLayer().instantiate(conf, null, 0, params, true, params.dataType()); layer.setBackpropGradientsViewArray(gradients); Updater updater = UpdaterCreator.getUpdater(layer); - int updaterStateSize = (int) layer.layerConf().getIUpdater().stateSize(numParams); + int updaterStateSize = (int) layer.getTypedLayerConfiguration().getIUpdater().stateSize(numParams); INDArray updaterState = Nd4j.create(1, updaterStateSize); updater.setStateViewArray(layer, updaterState, true); @@ -495,7 +495,7 @@ public class TestUpdaters extends BaseDL4JTest { assertEquals(gradExpected, gradient.getGradientFor(entry.getKey())); lastG.put(key, lastGTmp); } - assertEquals(rmsDecay, ((RmsProp)layer.layerConf().getIUpdater()).getRmsDecay(), 1e-4); + assertEquals(rmsDecay, ((RmsProp)layer.getTypedLayerConfiguration().getIUpdater()).getRmsDecay(), 1e-4); } @Test @@ -527,7 +527,7 @@ public class TestUpdaters extends BaseDL4JTest { gradExpected = val.mul(lr); assertEquals(gradExpected, gradient.getGradientFor(entry.getKey())); } - assertEquals(lr, ((Sgd)layer.layerConf().getIUpdater()).getLearningRate(), 1e-4); + assertEquals(lr, ((Sgd)layer.getTypedLayerConfiguration().getIUpdater()).getLearningRate(), 1e-4); } @@ -769,7 +769,7 @@ public class TestUpdaters extends BaseDL4JTest { gradExpected = val.mul(lr); assertEquals(gradExpected, gradient.getGradientFor(entry.getKey())); } - assertEquals(lr, ((Sgd)layer.layerConf().getIUpdater()).getLearningRate(), 1e-4); + assertEquals(lr, ((Sgd)layer.getTypedLayerConfiguration().getIUpdater()).getLearningRate(), 1e-4); //Test with pretrain == false @@ -797,7 +797,7 @@ public class TestUpdaters extends BaseDL4JTest { layer = (BaseLayer) conf.getFirstLayer().instantiate(conf, null, 0, params, true, params.dataType()); layer.setBackpropGradientsViewArray(gradients); updater = UpdaterCreator.getUpdater(layer); - assertEquals(lr, ((Sgd)layer.layerConf().getIUpdater()).getLearningRate(), 1e-4); + assertEquals(lr, ((Sgd)layer.getTypedLayerConfiguration().getIUpdater()).getLearningRate(), 1e-4); } @Test @@ -858,11 +858,11 @@ public class TestUpdaters extends BaseDL4JTest { //Check first updater block: UpdaterBlock ub0 = blocks.get(0); assertEquals(3, ub0.getLayersAndVariablesInBlock().size()); - assertEquals("l0", ub0.getLayersAndVariablesInBlock().get(0).getLayer().getConfig().getLayerName()); + assertEquals("l0", ub0.getLayersAndVariablesInBlock().get(0).getLayer().getTrainingConfig().getLayerName()); assertEquals(DefaultParamInitializer.WEIGHT_KEY, ub0.getLayersAndVariablesInBlock().get(0).getParamName()); - assertEquals("l0", ub0.getLayersAndVariablesInBlock().get(1).getLayer().getConfig().getLayerName()); + assertEquals("l0", ub0.getLayersAndVariablesInBlock().get(1).getLayer().getTrainingConfig().getLayerName()); assertEquals(DefaultParamInitializer.BIAS_KEY, ub0.getLayersAndVariablesInBlock().get(1).getParamName()); - assertEquals("l1", ub0.getLayersAndVariablesInBlock().get(2).getLayer().getConfig().getLayerName()); + assertEquals("l1", ub0.getLayersAndVariablesInBlock().get(2).getLayer().getTrainingConfig().getLayerName()); assertEquals(DefaultParamInitializer.WEIGHT_KEY, ub0.getLayersAndVariablesInBlock().get(2).getParamName()); int nParams0 = 10 * 10 + 10 + 10 * 10; @@ -875,7 +875,7 @@ public class TestUpdaters extends BaseDL4JTest { //Check second updater block: UpdaterBlock ub1 = blocks.get(1); assertEquals(1, ub1.getLayersAndVariablesInBlock().size()); - assertEquals("l1", ub1.getLayersAndVariablesInBlock().get(0).getLayer().getConfig().getLayerName()); + assertEquals("l1", ub1.getLayersAndVariablesInBlock().get(0).getLayer().getTrainingConfig().getLayerName()); assertEquals(DefaultParamInitializer.BIAS_KEY, ub1.getLayersAndVariablesInBlock().get(0).getParamName()); int nParams1 = 10; @@ -888,9 +888,9 @@ public class TestUpdaters extends BaseDL4JTest { //Check third updater block: UpdaterBlock ub2 = blocks.get(2); assertEquals(2, ub2.getLayersAndVariablesInBlock().size()); - assertEquals("l2", ub2.getLayersAndVariablesInBlock().get(0).getLayer().getConfig().getLayerName()); + assertEquals("l2", ub2.getLayersAndVariablesInBlock().get(0).getLayer().getTrainingConfig().getLayerName()); assertEquals(DefaultParamInitializer.WEIGHT_KEY, ub2.getLayersAndVariablesInBlock().get(0).getParamName()); - assertEquals("l2", ub2.getLayersAndVariablesInBlock().get(1).getLayer().getConfig().getLayerName()); + assertEquals("l2", ub2.getLayersAndVariablesInBlock().get(1).getLayer().getTrainingConfig().getLayerName()); assertEquals(DefaultParamInitializer.BIAS_KEY, ub2.getLayersAndVariablesInBlock().get(1).getParamName()); int nParams2 = 10 * 10 + 10; @@ -903,9 +903,9 @@ public class TestUpdaters extends BaseDL4JTest { //Check fourth updater block: UpdaterBlock ub3 = blocks.get(3); assertEquals(2, ub3.getLayersAndVariablesInBlock().size()); - assertEquals("l3", ub3.getLayersAndVariablesInBlock().get(0).getLayer().getConfig().getLayerName()); + assertEquals("l3", ub3.getLayersAndVariablesInBlock().get(0).getLayer().getTrainingConfig().getLayerName()); assertEquals(DefaultParamInitializer.WEIGHT_KEY, ub3.getLayersAndVariablesInBlock().get(0).getParamName()); - assertEquals("l3", ub3.getLayersAndVariablesInBlock().get(1).getLayer().getConfig().getLayerName()); + assertEquals("l3", ub3.getLayersAndVariablesInBlock().get(1).getLayer().getTrainingConfig().getLayerName()); assertEquals(DefaultParamInitializer.BIAS_KEY, ub3.getLayersAndVariablesInBlock().get(1).getParamName()); int nParams3 = 10 * 10 + 10; @@ -918,9 +918,9 @@ public class TestUpdaters extends BaseDL4JTest { //Check fifth updater black UpdaterBlock ub4 = blocks.get(4); assertEquals(2, ub4.getLayersAndVariablesInBlock().size()); - assertEquals("l4", ub4.getLayersAndVariablesInBlock().get(0).getLayer().getConfig().getLayerName()); + assertEquals("l4", ub4.getLayersAndVariablesInBlock().get(0).getLayer().getTrainingConfig().getLayerName()); assertEquals(DefaultParamInitializer.WEIGHT_KEY, ub4.getLayersAndVariablesInBlock().get(0).getParamName()); - assertEquals("l4", ub4.getLayersAndVariablesInBlock().get(1).getLayer().getConfig().getLayerName()); + assertEquals("l4", ub4.getLayersAndVariablesInBlock().get(1).getLayer().getTrainingConfig().getLayerName()); assertEquals(DefaultParamInitializer.BIAS_KEY, ub4.getLayersAndVariablesInBlock().get(1).getParamName()); int nParams4 = 10 * 10 + 10; diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/updater/custom/TestCustomUpdater.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/updater/custom/TestCustomUpdater.java index e5caf981f..e52b126f2 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/updater/custom/TestCustomUpdater.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/updater/custom/TestCustomUpdater.java @@ -22,7 +22,7 @@ package org.deeplearning4j.nn.updater.custom; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.layers.BaseLayer; +import org.deeplearning4j.nn.conf.layers.BaseLayerConfiguration; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; @@ -61,18 +61,18 @@ public class TestCustomUpdater extends BaseDL4JTest { .build(); //First: Check updater config - assertTrue(((BaseLayer) conf1.getConf(0).getLayer()).getIUpdater() instanceof CustomIUpdater); - assertTrue(((BaseLayer) conf1.getConf(1).getLayer()).getIUpdater() instanceof CustomIUpdater); - assertTrue(((BaseLayer) conf2.getConf(0).getLayer()).getIUpdater() instanceof Sgd); - assertTrue(((BaseLayer) conf2.getConf(1).getLayer()).getIUpdater() instanceof Sgd); + assertTrue(((BaseLayerConfiguration) conf1.getConf(0).getLayer()).getIUpdater() instanceof CustomIUpdater); + assertTrue(((BaseLayerConfiguration) conf1.getConf(1).getLayer()).getIUpdater() instanceof CustomIUpdater); + assertTrue(((BaseLayerConfiguration) conf2.getConf(0).getLayer()).getIUpdater() instanceof Sgd); + assertTrue(((BaseLayerConfiguration) conf2.getConf(1).getLayer()).getIUpdater() instanceof Sgd); - CustomIUpdater u0_0 = (CustomIUpdater) ((BaseLayer) conf1.getConf(0).getLayer()).getIUpdater(); - CustomIUpdater u0_1 = (CustomIUpdater) ((BaseLayer) conf1.getConf(1).getLayer()).getIUpdater(); + CustomIUpdater u0_0 = (CustomIUpdater) ((BaseLayerConfiguration) conf1.getConf(0).getLayer()).getIUpdater(); + CustomIUpdater u0_1 = (CustomIUpdater) ((BaseLayerConfiguration) conf1.getConf(1).getLayer()).getIUpdater(); assertEquals(lr, u0_0.getLearningRate(), 1e-6); assertEquals(lr, u0_1.getLearningRate(), 1e-6); - Sgd u1_0 = (Sgd) ((BaseLayer) conf2.getConf(0).getLayer()).getIUpdater(); - Sgd u1_1 = (Sgd) ((BaseLayer) conf2.getConf(1).getLayer()).getIUpdater(); + Sgd u1_0 = (Sgd) ((BaseLayerConfiguration) conf2.getConf(0).getLayer()).getIUpdater(); + Sgd u1_1 = (Sgd) ((BaseLayerConfiguration) conf2.getConf(1).getLayer()).getIUpdater(); assertEquals(lr, u1_0.getLearningRate(), 1e-6); assertEquals(lr, u1_1.getLearningRate(), 1e-6); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimize/solver/BackTrackLineSearchTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimize/solver/BackTrackLineSearchTest.java index 692f0f44f..91101fccc 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimize/solver/BackTrackLineSearchTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimize/solver/BackTrackLineSearchTest.java @@ -81,7 +81,7 @@ public class BackTrackLineSearchTest extends BaseDL4JTest { layer.computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces()); BackTrackLineSearch lineSearch = new BackTrackLineSearch(layer, layer.getOptimizer()); - double step = lineSearch.optimize(layer.params(), layer.gradient().gradient(), layer.gradient().gradient(), LayerWorkspaceMgr.noWorkspacesImmutable()); + double step = lineSearch.optimize(layer.getModelParams(), layer.gradient().gradient(), layer.gradient().gradient(), LayerWorkspaceMgr.noWorkspacesImmutable()); assertEquals(1.0, step, 1e-3); } @@ -97,11 +97,11 @@ public class BackTrackLineSearchTest extends BaseDL4JTest { layer.setInput(irisData.getFeatures(), LayerWorkspaceMgr.noWorkspaces()); layer.setLabels(irisData.getLabels()); layer.computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces()); - score1 = layer.score(); + score1 = layer.getScore(); BackTrackLineSearch lineSearch = new BackTrackLineSearch(layer, new NegativeDefaultStepFunction(), layer.getOptimizer()); - double step = lineSearch.optimize(layer.params(), layer.gradient().gradient(), layer.gradient().gradient(), LayerWorkspaceMgr.noWorkspacesImmutable()); + double step = lineSearch.optimize(layer.getModelParams(), layer.gradient().gradient(), layer.gradient().gradient(), LayerWorkspaceMgr.noWorkspacesImmutable()); assertEquals(1.0, step, 1e-3); } @@ -118,18 +118,18 @@ public class BackTrackLineSearchTest extends BaseDL4JTest { layer.setInput(irisData.getFeatures(), LayerWorkspaceMgr.noWorkspaces()); layer.setLabels(irisData.getLabels()); layer.computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces()); - score1 = layer.score(); + score1 = layer.getScore(); INDArray origGradient = layer.gradient().gradient().dup(); NegativeDefaultStepFunction sf = new NegativeDefaultStepFunction(); BackTrackLineSearch lineSearch = new BackTrackLineSearch(layer, sf, layer.getOptimizer()); - double step = lineSearch.optimize(layer.params(), layer.gradient().gradient(), layer.gradient().gradient(), LayerWorkspaceMgr.noWorkspacesImmutable()); - INDArray currParams = layer.params(); + double step = lineSearch.optimize(layer.getModelParams(), layer.gradient().gradient(), layer.gradient().gradient(), LayerWorkspaceMgr.noWorkspacesImmutable()); + INDArray currParams = layer.getModelParams(); sf.step(currParams, origGradient, step); layer.setParamsTable(currParams); layer.computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces()); - score2 = layer.score(); + score2 = layer.getScore(); assertTrue(score1 > score2, "score1=" + score1 + ", score2=" + score2); @@ -146,19 +146,19 @@ public class BackTrackLineSearchTest extends BaseDL4JTest { layer.setInput(irisData.getFeatures(), LayerWorkspaceMgr.noWorkspaces()); layer.setLabels(irisData.getLabels()); layer.computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces()); - score1 = layer.score(); + score1 = layer.getScore(); INDArray origGradient = layer.gradient().gradient().dup(); DefaultStepFunction sf = new DefaultStepFunction(); BackTrackLineSearch lineSearch = new BackTrackLineSearch(layer, sf, layer.getOptimizer()); - double step = lineSearch.optimize(layer.params().dup(), layer.gradient().gradient().dup(), + double step = lineSearch.optimize(layer.getModelParams().dup(), layer.gradient().gradient().dup(), layer.gradient().gradient().dup(), LayerWorkspaceMgr.noWorkspacesImmutable()); - INDArray currParams = layer.params(); + INDArray currParams = layer.getModelParams(); sf.step(currParams, origGradient, step); layer.setParamsTable(currParams); layer.computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces()); - score2 = layer.score(); + score2 = layer.getScore(); assertTrue(score1 < score2, "score1 = " + score1 + ", score2 = " + score2); } @@ -190,12 +190,12 @@ public class BackTrackLineSearchTest extends BaseDL4JTest { MultiLayerNetwork network = new MultiLayerNetwork(getIrisMultiLayerConfig(Activation.SIGMOID, optimizer)); network.init(); TrainingListener listener = new ScoreIterationListener(10); - network.setListeners(Collections.singletonList(listener)); + network.addTrainingListeners(Collections.singletonList(listener)); double oldScore = network.score(data); for( int i=0; i<100; i++ ) { network.fit(data.getFeatures(), data.getLabels()); } - double score = network.score(); + double score = network.getScore(); assertTrue(score < oldScore); } @@ -208,13 +208,13 @@ public class BackTrackLineSearchTest extends BaseDL4JTest { MultiLayerNetwork network = new MultiLayerNetwork(getIrisMultiLayerConfig(Activation.RELU, optimizer)); network.init(); TrainingListener listener = new ScoreIterationListener(10); - network.setListeners(Collections.singletonList(listener)); + network.addTrainingListeners(Collections.singletonList(listener)); double firstScore = network.score(data); for( int i=0; i<5; i++ ) { network.fit(data.getFeatures(), data.getLabels()); } - double score = network.score(); + double score = network.getScore(); assertTrue(score < firstScore); } @@ -227,13 +227,13 @@ public class BackTrackLineSearchTest extends BaseDL4JTest { MultiLayerNetwork network = new MultiLayerNetwork(getIrisMultiLayerConfig(Activation.RELU, optimizer)); network.init(); TrainingListener listener = new ScoreIterationListener(10); - network.setListeners(Collections.singletonList(listener)); + network.addTrainingListeners(Collections.singletonList(listener)); double oldScore = network.score(data); for( int i=0; i<5; i++ ) { network.fit(data.getFeatures(), data.getLabels()); } - double score = network.score(); + double score = network.getScore(); assertTrue(score < oldScore); } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimize/solver/TestOptimizers.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimize/solver/TestOptimizers.java index 69afb6330..7883c899f 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimize/solver/TestOptimizers.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimize/solver/TestOptimizers.java @@ -28,6 +28,7 @@ import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator; import org.deeplearning4j.nn.api.*; import org.deeplearning4j.nn.conf.CacheMode; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.BaseLayerConfiguration; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.OutputLayer; @@ -211,38 +212,38 @@ public class TestOptimizers extends BaseDL4JTest { System.out.println("---------\n Alg= " + oa + ", nIter= " + numLineSearchIter + ", nDimensions= " + nDimensions); - NeuralNetConfiguration conf = NeuralNetConfiguration.builder().maxNumLineSearchIterations(numLineSearchIter) + LayerConfiguration conf = NeuralNetConfiguration.builder().maxNumLineSearchIterations(numLineSearchIter) .updater(new Sgd(1e-2)) - .layer(new DenseLayer.Builder().nIn(1).nOut(1).build()).build(); - conf.addNetWideVariable("W"); //Normally done by ParamInitializers, but obviously that isn't done here + .layer(new DenseLayer.Builder().nIn(1).nOut(1).build()).build().getFlattenedLayerConfigurations().get(0); + conf.addVariable("W"); //Normally done by ParamInitializers, but obviously that isn't done here Random rng = new DefaultRandom(12345L); org.nd4j.linalg.api.rng.distribution.Distribution dist = new org.nd4j.linalg.api.rng.distribution.impl.UniformDistribution(rng, -10, 10); IModel m = new SphereFunctionModel(nDimensions, dist, conf); m.computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces()); - double scoreBefore = m.score(); + double scoreBefore = m.getScore(); assertTrue(!Double.isNaN(scoreBefore) && !Double.isInfinite(scoreBefore)); if (PRINT_OPT_RESULTS) { System.out.println("Before:"); System.out.println(scoreBefore); - System.out.println(m.params()); + System.out.println(m.getModelParams()); } - ConvexOptimizer opt = getOptimizer(oa, conf, m); + ConvexOptimizer opt = getOptimizer(oa, conf.getNetConfiguration(), m); opt.setupSearchState(m.gradientAndScore()); for( int i=0; i<100; i++ ) { opt.optimize(LayerWorkspaceMgr.noWorkspaces()); } m.computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces()); - double scoreAfter = m.score(); + double scoreAfter = m.getScore(); assertTrue(!Double.isNaN(scoreAfter) && !Double.isInfinite(scoreAfter)); if (PRINT_OPT_RESULTS) { System.out.println("After:"); System.out.println(scoreAfter); - System.out.println(m.params()); + System.out.println(m.getModelParams()); } //Expected behaviour after optimization: @@ -279,17 +280,17 @@ public class TestOptimizers extends BaseDL4JTest { .layer(new DenseLayer.Builder().nIn(1).nOut(1).build()).build(); conf.addNetWideVariable("W"); //Normally done by ParamInitializers, but obviously that isn't done here - IModel m = new SphereFunctionModel(100, dist, conf); + IModel m = new SphereFunctionModel(100, dist, conf.getFlattenedLayerConfigurations().get(0)); if (i == 0) { m.computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces()); - scores[0] = m.score(); //Before optimization + scores[0] = m.getScore(); //Before optimization } else { ConvexOptimizer opt = getOptimizer(oa, conf, m); for( int j=0; j<100; j++ ) { opt.optimize(LayerWorkspaceMgr.noWorkspaces()); } m.computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces()); - scores[i] = m.score(); + scores[i] = m.getScore(); assertTrue(!Double.isNaN(scores[i]) && !Double.isInfinite(scores[i])); } } @@ -316,7 +317,7 @@ public class TestOptimizers extends BaseDL4JTest { private static final long serialVersionUID = -6963606137417355405L; private SphereFunctionModel(int nParams, org.nd4j.linalg.api.rng.distribution.Distribution distribution, - NeuralNetConfiguration conf) { + LayerConfiguration conf) { super(distribution.sample(new int[] {1, nParams}), conf); } @@ -437,7 +438,7 @@ public class TestOptimizers extends BaseDL4JTest { } @Override - public void setListeners(TrainingListener... listeners) { + public void addTrainingListeners(TrainingListener... listeners) { } @@ -499,17 +500,17 @@ public class TestOptimizers extends BaseDL4JTest { .layer(new DenseLayer.Builder().nIn(1).nOut(1).build()).build(); conf.addNetWideVariable("W"); //Normally done by ParamInitializers, but obviously that isn't done here - IModel m = new RastriginFunctionModel(10, conf); + IModel m = new RastriginFunctionModel(10, conf.getFlattenedLayerConfigurations().get(0)); int nParams = (int)m.numParams(); if (i == 0) { m.computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces()); - scores[0] = m.score(); //Before optimization + scores[0] = m.getScore(); //Before optimization } else { ConvexOptimizer opt = getOptimizer(oa, conf, m); opt.getUpdater().setStateViewArray((Layer) m, Nd4j.create(new int[] {1, nParams}, 'c'), true); opt.optimize(LayerWorkspaceMgr.noWorkspaces()); m.computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces()); - scores[i] = m.score(); + scores[i] = m.getScore(); assertTrue(!Double.isNaN(scores[i]) && !Double.isInfinite(scores[i])); } } @@ -540,7 +541,7 @@ public class TestOptimizers extends BaseDL4JTest { private static class RastriginFunctionModel extends SimpleOptimizableModel { private static final long serialVersionUID = -1772954508787487941L; - private RastriginFunctionModel(int nDimensions, NeuralNetConfiguration conf) { + private RastriginFunctionModel(int nDimensions, LayerConfiguration conf) { super(initParams(nDimensions), conf); } @@ -710,7 +711,7 @@ public class TestOptimizers extends BaseDL4JTest { } @Override - public void setListeners(TrainingListener... listeners) { + public void addTrainingListeners(TrainingListener... listeners) { } @@ -768,15 +769,15 @@ public class TestOptimizers extends BaseDL4JTest { .build(); conf.addNetWideVariable("W"); //Normally done by ParamInitializers, but obviously that isn't done here - IModel m = new RosenbrockFunctionModel(100, conf); + IModel m = new RosenbrockFunctionModel(100, conf.getFlattenedLayerConfigurations().get(0)); if (i == 0) { m.computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces()); - scores[0] = m.score(); //Before optimization + scores[0] = m.getScore(); //Before optimization } else { ConvexOptimizer opt = getOptimizer(oa, conf, m); opt.optimize(LayerWorkspaceMgr.noWorkspaces()); m.computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces()); - scores[i] = m.score(); + scores[i] = m.getScore(); assertTrue(!Double.isNaN(scores[i]) && !Double.isInfinite(scores[i]), "NaN or infinite score: " + scores[i]); } } @@ -810,7 +811,7 @@ public class TestOptimizers extends BaseDL4JTest { private static class RosenbrockFunctionModel extends SimpleOptimizableModel { private static final long serialVersionUID = -5129494342531033706L; - private RosenbrockFunctionModel(int nDimensions, NeuralNetConfiguration conf) { + private RosenbrockFunctionModel(int nDimensions, LayerConfiguration conf) { super(initParams(nDimensions), conf); } @@ -995,7 +996,7 @@ public class TestOptimizers extends BaseDL4JTest { } @Override - public void setListeners(TrainingListener... listeners) { + public void addTrainingListeners(TrainingListener... listeners) { } @@ -1029,13 +1030,31 @@ public class TestOptimizers extends BaseDL4JTest { private static final long serialVersionUID = 4409380971404019303L; protected INDArray parameters; protected INDArray gradientView; - protected final NeuralNetConfiguration conf; + protected final LayerConfiguration conf; protected Gradient gradient; protected double score; + /** + * @return 1d parameter vector + */ + @Override + public INDArray getParams() { + throw new RuntimeException("Not implemented"); + } + + /** + * Get a reference to the network this layer is part of. + * + * @return + */ + @Override + public IModel getNet() { + throw new RuntimeException("Not implemented"); + } + /**@param parameterInit Initial parameters. Also determines dimensionality of problem. Should be row vector. */ - private SimpleOptimizableModel(INDArray parameterInit, NeuralNetConfiguration conf) { + private SimpleOptimizableModel(INDArray parameterInit, LayerConfiguration conf) { this.parameters = parameterInit.dup(); this.gradientView = Nd4j.create(parameterInit.shape()); this.conf = conf; @@ -1048,17 +1067,12 @@ public class TestOptimizers extends BaseDL4JTest { */ @Override public LayerConfiguration getLayerConfiguration() { - return this.conf.getFirstLayer(); + return this.conf; } @Override - public void addListeners(TrainingListener... listener) { - // no-op - } - - @Override - public TrainingConfig getConfig() { - return conf.getFirstLayer(); + public ITraininableLayerConfiguration getTrainingConfig() { + return (BaseLayerConfiguration) conf; } /** @@ -1092,7 +1106,7 @@ public class TestOptimizers extends BaseDL4JTest { } @Override - public void setListeners(TrainingListener... listeners) { + public void addTrainingListeners(TrainingListener... listeners) { } @@ -1112,7 +1126,7 @@ public class TestOptimizers extends BaseDL4JTest { } @Override - public double score() { + public double getScore() { return score; } @@ -1132,7 +1146,7 @@ public class TestOptimizers extends BaseDL4JTest { } @Override - public INDArray params() { + public INDArray getModelParams() { return parameters; } @@ -1154,7 +1168,7 @@ public class TestOptimizers extends BaseDL4JTest { @Override public Pair gradientAndScore() { computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces()); - return new Pair<>(gradient(), score()); + return new Pair<>(gradient(), getScore()); } @Override @@ -1164,7 +1178,7 @@ public class TestOptimizers extends BaseDL4JTest { @Override public NeuralNetConfiguration getNetConfiguration() { - return conf; + return conf.getNetConfiguration(); } @Override @@ -1225,12 +1239,12 @@ public class TestOptimizers extends BaseDL4JTest { } @Override - public Collection getListeners() { + public Collection getTrainingListeners() { return null; } @Override - public void setListeners(Collection listeners) { + public void addTrainingListeners(Collection listeners) { throw new UnsupportedOperationException(); } @@ -1310,4 +1324,6 @@ public class TestOptimizers extends BaseDL4JTest { public void close(){ } } + + } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimizer/listener/TestCheckpointListener.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimizer/listener/TestCheckpointListener.java index 6f422fda1..2e34fcd46 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimizer/listener/TestCheckpointListener.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimizer/listener/TestCheckpointListener.java @@ -76,7 +76,7 @@ public class TestCheckpointListener extends BaseDL4JTest { .keepAll() .saveEveryNEpochs(2) .build(); - net.setListeners(l); + net.addTrainingListeners(l); for(int i=0; i<10; i++ ){ net.fit(iter); @@ -125,7 +125,7 @@ public class TestCheckpointListener extends BaseDL4JTest { .keepLast(3) .saveEveryNIterations(5) .build(); - net.setListeners(l); + net.addTrainingListeners(l); for(int i=0; i<20; i++ ){ //40 iterations total net.fit(iter); @@ -167,7 +167,7 @@ public class TestCheckpointListener extends BaseDL4JTest { MultiLayerNetwork netStatic2 = CheckpointListener.loadLastCheckpointMLN(f); assertEquals(35, netStatic2.getIterationCount()); - assertEquals(netStatic.params(), netStatic2.params()); + assertEquals(netStatic.getModelParams(), netStatic2.getModelParams()); } @Test @@ -182,7 +182,7 @@ public class TestCheckpointListener extends BaseDL4JTest { .keepLast(3) .saveEvery(4900, TimeUnit.MILLISECONDS) .build(); - net.setListeners(l); + net.addTrainingListeners(l); for(int i=0; i<3; i++ ){ //10 iterations total net.fit(iter); @@ -226,7 +226,7 @@ public class TestCheckpointListener extends BaseDL4JTest { .keepLastAndEvery(3, 3) .saveEveryNEpochs(2) .build(); - net.setListeners(l); + net.addTrainingListeners(l); for(int i=0; i<20; i++ ){ //40 iterations total net.fit(iter); @@ -272,7 +272,7 @@ public class TestCheckpointListener extends BaseDL4JTest { .keepAll() .saveEveryNEpochs(1) .build(); - net.setListeners(l); + net.addTrainingListeners(l); for(int i=0; i<3; i++ ){ net.fit(iter); @@ -294,7 +294,7 @@ public class TestCheckpointListener extends BaseDL4JTest { .saveEveryNEpochs(1) .deleteExisting(true) .build(); - net.setListeners(l); + net.addTrainingListeners(l); net.fit(iter); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimizer/listener/TestFailureListener.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimizer/listener/TestFailureListener.java index a1933c247..fb500772d 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimizer/listener/TestFailureListener.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimizer/listener/TestFailureListener.java @@ -58,7 +58,7 @@ public class TestFailureListener extends BaseDL4JTest { MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); - net.setListeners(new FailureTestingListener( + net.addTrainingListeners(new FailureTestingListener( // FailureTestingListener.FailureMode.OOM, FailureTestingListener.FailureMode.SYSTEM_EXIT_1, new FailureTestingListener.IterationEpochTrigger(false, 10))); @@ -84,7 +84,7 @@ public class TestFailureListener extends BaseDL4JTest { assertNotNull(username); assertFalse(username.isEmpty()); - net.setListeners(new FailureTestingListener( + net.addTrainingListeners(new FailureTestingListener( FailureTestingListener.FailureMode.SYSTEM_EXIT_1, new FailureTestingListener.Or( new FailureTestingListener.IterationEpochTrigger(false, 10000), @@ -112,7 +112,7 @@ public class TestFailureListener extends BaseDL4JTest { assertNotNull(hostname); assertFalse(hostname.isEmpty()); - net.setListeners(new FailureTestingListener( + net.addTrainingListeners(new FailureTestingListener( FailureTestingListener.FailureMode.ILLEGAL_STATE, new FailureTestingListener.And( new FailureTestingListener.HostNameTrigger(hostname), diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimizer/listener/TestListeners.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimizer/listener/TestListeners.java index b335d43a6..f3d4f5dee 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimizer/listener/TestListeners.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimizer/listener/TestListeners.java @@ -77,17 +77,17 @@ public class TestListeners extends BaseDL4JTest { MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); - net.setListeners(new ScoreIterationListener(), new TestRoutingListener()); + net.addTrainingListeners(new ScoreIterationListener(), new TestRoutingListener()); for (Layer l : net.getLayers()) { - Collection layerListeners = l.getListeners(); + Collection layerListeners = l.getTrainingListeners(); assertEquals(2, layerListeners.size(), l.getClass().toString()); TrainingListener[] lArr = layerListeners.toArray(new TrainingListener[2]); assertTrue(lArr[0] instanceof ScoreIterationListener); assertTrue(lArr[1] instanceof TestRoutingListener); } - Collection netListeners = net.getListeners(); + Collection netListeners = net.getTrainingListeners(); assertEquals(2, netListeners.size()); TrainingListener[] lArr = netListeners.toArray(new TrainingListener[2]); assertTrue(lArr[0] instanceof ScoreIterationListener); @@ -101,17 +101,17 @@ public class TestListeners extends BaseDL4JTest { ComputationGraph cg = new ComputationGraph(gConf); cg.init(); - cg.setListeners(new ScoreIterationListener(), new TestRoutingListener()); + cg.addTrainingListeners(new ScoreIterationListener(), new TestRoutingListener()); for (Layer l : cg.getLayers()) { - Collection layerListeners = l.getListeners(); + Collection layerListeners = l.getTrainingListeners(); assertEquals(2, layerListeners.size()); lArr = layerListeners.toArray(new TrainingListener[2]); assertTrue(lArr[0] instanceof ScoreIterationListener); assertTrue(lArr[1] instanceof TestRoutingListener); } - netListeners = cg.getListeners(); + netListeners = cg.getTrainingListeners(); assertEquals(2, netListeners.size()); lArr = netListeners.toArray(new TrainingListener[2]); assertTrue(lArr[0] instanceof ScoreIterationListener); @@ -180,7 +180,7 @@ public class TestListeners extends BaseDL4JTest { MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); - net.setListeners(listeners); + net.addTrainingListeners(listeners); net.fit(iter); @@ -199,7 +199,7 @@ public class TestListeners extends BaseDL4JTest { listeners2.add(il2); } - net.setListeners(listeners2); + net.addTrainingListeners(listeners2); net.fit(iter); } @@ -216,7 +216,7 @@ public class TestListeners extends BaseDL4JTest { net.init(); TestListener tl = new TestListener(); - net.setListeners(tl); + net.addTrainingListeners(tl); DataSetIterator irisIter = new IrisDataSetIterator(50, 150); @@ -260,7 +260,7 @@ public class TestListeners extends BaseDL4JTest { tl = new TestListener(); ComputationGraph cg = net.toComputationGraph(); - cg.setListeners(tl); + cg.addTrainingListeners(tl); cg.fit(irisIter, 2); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/parallelism/RandomTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/parallelism/RandomTests.java index 114d90887..2c214eeff 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/parallelism/RandomTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/parallelism/RandomTests.java @@ -94,7 +94,7 @@ public class RandomTests extends BaseDL4JTest { // at the end of day, model params has to for (int i = 0; i < models.size(); i++) { - assertEquals(models.get(0).params(), models.get(i).params()); + assertEquals(models.get(0).getModelParams(), models.get(i).getModelParams()); } } @@ -119,7 +119,7 @@ public class RandomTests extends BaseDL4JTest { MultiLayerNetwork net2 = new MultiLayerNetwork(conf); net2.init(); - assertEquals(net1.params(), net2.params()); + assertEquals(net1.getModelParams(), net2.getModelParams()); NeuralNetConfiguration fromJson = NeuralNetConfiguration.fromJson(json); @@ -127,6 +127,6 @@ public class RandomTests extends BaseDL4JTest { MultiLayerNetwork net3 = new MultiLayerNetwork(fromJson); net3.init(); - assertEquals(net1.params(), net3.params()); + assertEquals(net1.getModelParams(), net3.getModelParams()); } } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/perf/listener/TestSystemInfoPrintListener.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/perf/listener/TestSystemInfoPrintListener.java index c52f4943f..6b2d882e3 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/perf/listener/TestSystemInfoPrintListener.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/perf/listener/TestSystemInfoPrintListener.java @@ -63,7 +63,7 @@ public class TestSystemInfoPrintListener extends BaseDL4JTest { MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); - net.setListeners(systemInfoFilePrintListener); + net.addTrainingListeners(systemInfoFilePrintListener); DataSetIterator iter = new IrisDataSetIterator(10, 150); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest050.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest050.java index df6f1e0cb..773ccbae8 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest050.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest050.java @@ -87,7 +87,7 @@ public class RegressionTest050 extends BaseDL4JTest { assertEquals(0.15, ((Nesterovs)l1.getIUpdater()).getLearningRate(), 1e-6); int numParams = (int)net.numParams(); - assertEquals(Nd4j.linspace(1, numParams, numParams, Nd4j.dataType()).reshape(1,numParams), net.params()); + assertEquals(Nd4j.linspace(1, numParams, numParams, Nd4j.dataType()).reshape(1,numParams), net.getModelParams()); int updaterSize = (int) new Nesterovs().stateSize(net.numParams()); assertEquals(Nd4j.linspace(1, updaterSize, updaterSize, Nd4j.dataType()).reshape(1,numParams), net.getUpdater().getStateViewArray()); } @@ -126,7 +126,7 @@ public class RegressionTest050 extends BaseDL4JTest { assertEquals(new WeightDecay(0.2, false), TestUtils.getWeightDecayReg(l1)); int numParams = (int)net.numParams(); - assertEquals(Nd4j.linspace(1, numParams, numParams, Nd4j.dataType()).reshape(1,numParams), net.params()); + assertEquals(Nd4j.linspace(1, numParams, numParams, Nd4j.dataType()).reshape(1,numParams), net.getModelParams()); int updaterSize = (int) new RmsProp().stateSize(numParams); assertEquals(Nd4j.linspace(1, updaterSize, updaterSize, Nd4j.dataType()).reshape(1,numParams), net.getUpdater().getStateViewArray()); } @@ -170,7 +170,7 @@ public class RegressionTest050 extends BaseDL4JTest { assertEquals(0.15, ((RmsProp)l0.getIUpdater()).getLearningRate(), 1e-6); int numParams = (int)net.numParams(); - assertEquals(Nd4j.linspace(1, numParams, numParams, Nd4j.dataType()).reshape(1,numParams), net.params()); + assertEquals(Nd4j.linspace(1, numParams, numParams, Nd4j.dataType()).reshape(1,numParams), net.getModelParams()); int updaterSize = (int) new RmsProp().stateSize(numParams); assertEquals(Nd4j.linspace(1, updaterSize, updaterSize, Nd4j.dataType()).reshape(1,numParams), net.getUpdater().getStateViewArray()); } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest060.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest060.java index d6c88b4d3..c75c11d11 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest060.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest060.java @@ -89,7 +89,7 @@ public class RegressionTest060 extends BaseDL4JTest { assertEquals(0.15, ((Nesterovs)l1.getIUpdater()).getLearningRate(), 1e-6); int numParams = (int)net.numParams(); - assertEquals(Nd4j.linspace(1, numParams, numParams, Nd4j.dataType()).reshape(1,numParams), net.params()); + assertEquals(Nd4j.linspace(1, numParams, numParams, Nd4j.dataType()).reshape(1,numParams), net.getModelParams()); int updaterSize = (int) new Nesterovs().stateSize(numParams); assertEquals(Nd4j.linspace(1, updaterSize, updaterSize, Nd4j.dataType()).reshape(1,numParams), net.getUpdater().getStateViewArray()); } @@ -132,7 +132,7 @@ public class RegressionTest060 extends BaseDL4JTest { assertEquals(1.5, l1.getGradientNormalizationThreshold(), 1e-5); int numParams = (int)net.numParams(); - assertEquals(Nd4j.linspace(1, numParams, numParams, Nd4j.dataType()).reshape(1,numParams), net.params()); + assertEquals(Nd4j.linspace(1, numParams, numParams, Nd4j.dataType()).reshape(1,numParams), net.getModelParams()); int updaterSize = (int) new RmsProp().stateSize(numParams); assertEquals(Nd4j.linspace(1, updaterSize, updaterSize, Nd4j.dataType()).reshape(1,numParams), net.getUpdater().getStateViewArray()); } @@ -178,7 +178,7 @@ public class RegressionTest060 extends BaseDL4JTest { assertTrue(conf.getInputPreProcess(2) instanceof CnnToFeedForwardPreProcessor); int numParams = (int)net.numParams(); - assertEquals(Nd4j.linspace(1, numParams, numParams, Nd4j.dataType()).reshape(1,numParams), net.params()); + assertEquals(Nd4j.linspace(1, numParams, numParams, Nd4j.dataType()).reshape(1,numParams), net.getModelParams()); int updaterSize = (int) new RmsProp().stateSize(numParams); assertEquals(Nd4j.linspace(1, updaterSize, updaterSize, Nd4j.dataType()).reshape(1,numParams), net.getUpdater().getStateViewArray()); } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest071.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest071.java index bf14dba46..63ea30e49 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest071.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest071.java @@ -90,7 +90,7 @@ public class RegressionTest071 extends BaseDL4JTest { assertEquals(0.15, ((Nesterovs)l1.getIUpdater()).getLearningRate(), 1e-6); long numParams = (int)net.numParams(); - assertEquals(Nd4j.linspace(1, numParams, numParams).reshape(1,numParams), net.params()); + assertEquals(Nd4j.linspace(1, numParams, numParams).reshape(1,numParams), net.getModelParams()); int updaterSize = (int) new Nesterovs().stateSize(numParams); assertEquals(Nd4j.linspace(1, updaterSize, updaterSize).reshape(1,numParams), net.getUpdater().getStateViewArray()); } @@ -133,7 +133,7 @@ public class RegressionTest071 extends BaseDL4JTest { assertEquals(1.5, l1.getGradientNormalizationThreshold(), 1e-5); long numParams = net.numParams(); - assertEquals(Nd4j.linspace(1, numParams, numParams).reshape(1,numParams), net.params()); + assertEquals(Nd4j.linspace(1, numParams, numParams).reshape(1,numParams), net.getModelParams()); int updaterSize = (int) new RmsProp().stateSize(numParams); assertEquals(Nd4j.linspace(1, updaterSize, updaterSize).reshape(1,numParams), net.getUpdater().getStateViewArray()); } @@ -179,7 +179,7 @@ public class RegressionTest071 extends BaseDL4JTest { assertTrue(conf.getInputPreProcess(2) instanceof CnnToFeedForwardPreProcessor); long numParams = net.numParams(); - assertEquals(Nd4j.linspace(1, numParams, numParams).reshape(1,numParams), net.params()); + assertEquals(Nd4j.linspace(1, numParams, numParams).reshape(1,numParams), net.getModelParams()); int updaterSize = (int) new RmsProp().stateSize(numParams); assertEquals(Nd4j.linspace(1, updaterSize, updaterSize).reshape(1,numParams), net.getUpdater().getStateViewArray()); } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest080.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest080.java index 4cc26f05a..010ac9733 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest080.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest080.java @@ -94,7 +94,7 @@ public class RegressionTest080 extends BaseDL4JTest { assertEquals(0.15, n.getLearningRate(), 1e-6); int numParams = (int)net.numParams(); - assertEquals(Nd4j.linspace(1, numParams, numParams, Nd4j.dataType()).reshape(1,numParams), net.params()); + assertEquals(Nd4j.linspace(1, numParams, numParams, Nd4j.dataType()).reshape(1,numParams), net.getModelParams()); int updaterSize = (int) new Nesterovs().stateSize(numParams); assertEquals(Nd4j.linspace(1, updaterSize, updaterSize, Nd4j.dataType()).reshape(1,numParams), net.getUpdater().getStateViewArray()); } @@ -143,7 +143,7 @@ public class RegressionTest080 extends BaseDL4JTest { assertEquals(1.5, l1.getGradientNormalizationThreshold(), 1e-5); int numParams = (int)net.numParams(); - assertEquals(Nd4j.linspace(1, numParams, numParams, Nd4j.dataType()).reshape(1,numParams), net.params()); + assertEquals(Nd4j.linspace(1, numParams, numParams, Nd4j.dataType()).reshape(1,numParams), net.getModelParams()); int updaterSize = (int) new RmsProp().stateSize(numParams); assertEquals(Nd4j.linspace(1, updaterSize, updaterSize, Nd4j.dataType()).reshape(1,numParams), net.getUpdater().getStateViewArray()); } @@ -194,7 +194,7 @@ public class RegressionTest080 extends BaseDL4JTest { assertTrue(conf.getInputPreProcess(2) instanceof CnnToFeedForwardPreProcessor); int numParams = (int)net.numParams(); - assertEquals(Nd4j.linspace(1, numParams, numParams, Nd4j.dataType()).reshape(1,numParams), net.params()); + assertEquals(Nd4j.linspace(1, numParams, numParams, Nd4j.dataType()).reshape(1,numParams), net.getModelParams()); int updaterSize = (int) new RmsProp().stateSize(numParams); assertEquals(Nd4j.linspace(1, updaterSize, updaterSize, Nd4j.dataType()).reshape(1,numParams), net.getUpdater().getStateViewArray()); } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b3.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b3.java index 6d73c1074..829fc8c2b 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b3.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b3.java @@ -97,7 +97,7 @@ public class RegressionTest100b3 extends BaseDL4JTest { assertEquals(dt, in.dataType()); assertEquals(dt, outExp.dataType()); - assertEquals(dt, net.params().dataType()); + assertEquals(dt, net.getModelParams().dataType()); assertEquals(dt, net.getFlattenedGradients().dataType()); assertEquals(dt, net.getUpdater().getStateViewArray().dataType()); @@ -109,7 +109,7 @@ public class RegressionTest100b3 extends BaseDL4JTest { List activations = net.feedForward(in); assertEquals(dt, net.getNetConfiguration().getDataType()); - assertEquals(dt, net.params().dataType()); + assertEquals(dt, net.getModelParams().dataType()); assertEquals( outExp, outAct, dtype); } } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b4.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b4.java index bd2f231d2..b1247b3c1 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b4.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b4.java @@ -116,7 +116,7 @@ public class RegressionTest100b4 extends BaseDL4JTest { assertEquals(dtype, in.dataType()); assertEquals(dtype, outExp.dataType()); - assertEquals(dtype, net.params().dataType()); + assertEquals(dtype, net.getModelParams().dataType()); assertEquals(dtype, net.getFlattenedGradients().dataType()); assertEquals(dtype, net.getUpdater().getStateViewArray().dataType()); @@ -126,7 +126,7 @@ public class RegressionTest100b4 extends BaseDL4JTest { assertEquals(dtype, outAct.dataType()); assertEquals(dtype, net.getNetConfiguration().getDataType()); - assertEquals(dtype, net.params().dataType()); + assertEquals(dtype, net.getModelParams().dataType()); boolean eq = outExp.equalsWithEps(outAct, 0.01); assertTrue(eq, "Test for dtype: " + dtypeName + "\n" + outExp + " vs " + outAct); } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b6.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b6.java index bf13cff1b..f00b9c437 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b6.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b6.java @@ -98,7 +98,7 @@ public class RegressionTest100b6 extends BaseDL4JTest { assertEquals(dtype, in.dataType()); assertEquals(dtype, outExp.dataType()); - assertEquals(dtype, net.params().dataType()); + assertEquals(dtype, net.getModelParams().dataType()); assertEquals(dtype, net.getFlattenedGradients().dataType()); assertEquals(dtype, net.getUpdater().getStateViewArray().dataType()); @@ -108,7 +108,7 @@ public class RegressionTest100b6 extends BaseDL4JTest { assertEquals(dtype, outAct.dataType()); assertEquals(dtype, net.getNetConfiguration().getDataType()); - assertEquals(dtype, net.params().dataType()); + assertEquals(dtype, net.getModelParams().dataType()); boolean eq = outExp.equalsWithEps(outAct, 0.01); assertTrue( eq, "Test for dtype: " + dtypeName + " - " + outExp + " vs " + outAct); } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/customlayer100a/CustomLayer.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/customlayer100a/CustomLayer.java index 72b55f9e6..b20ad6f00 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/customlayer100a/CustomLayer.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/customlayer100a/CustomLayer.java @@ -76,7 +76,7 @@ public class CustomLayer extends FeedForwardLayer { //For the most part, it's the same for each type of layer CustomLayerImpl myCustomLayer = new CustomLayerImpl(lconf, networkDataType); - myCustomLayer.setListeners(iterationListeners); //Set the iteration listeners, if any + myCustomLayer.addTrainingListeners(iterationListeners); //Set the iteration listeners, if any myCustomLayer.setIndex(layerIndex); //Integer index of the layer //Parameter view array: In Deeplearning4j, the network parameters for the entire network (all layers) are diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/customlayer100a/CustomLayerImpl.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/customlayer100a/CustomLayerImpl.java index d233a5da3..14e13634b 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/customlayer100a/CustomLayerImpl.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/customlayer100a/CustomLayerImpl.java @@ -20,7 +20,6 @@ package org.deeplearning4j.regressiontest.customlayer100a; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; @@ -56,7 +55,7 @@ public class CustomLayerImpl extends BaseLayer { //Generic paramete INDArray firstHalf = output.get(NDArrayIndex.all(), NDArrayIndex.interval(0, columns / 2)); INDArray secondHalf = output.get(NDArrayIndex.all(), NDArrayIndex.interval(columns / 2, columns)); - IActivation activation1 = layerConf().getActivationFn(); + IActivation activation1 = getTypedLayerConfiguration().getActivationFn(); IActivation activation2 = ((CustomLayer) getLayerConfiguration()).getSecondActivationFunction(); //IActivation function instances modify the activation functions in-place @@ -75,7 +74,7 @@ public class CustomLayerImpl extends BaseLayer { //Generic paramete @Override public Pair backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { /* - The baockprop gradient method here is very similar to the BaseLayer backprop gradient implementation + The baockprop gradient method here is very similar to the BaseLayerConfiguration backprop gradient implementation The only major difference is the two activation functions we have added in this example. Note that epsilon is dL/da - i.e., the derivative of the loss function with respect to the activations. @@ -105,14 +104,14 @@ public class CustomLayerImpl extends BaseLayer { //Generic paramete INDArray epsilonFirstHalf = epsilon.get(NDArrayIndex.all(), NDArrayIndex.interval(0, columns / 2)); INDArray epsilonSecondHalf = epsilon.get(NDArrayIndex.all(), NDArrayIndex.interval(columns / 2, columns)); - IActivation activation1 = layerConf().getActivationFn(); + IActivation activation1 = getTypedLayerConfiguration().getActivationFn(); IActivation activation2 = ((CustomLayer) getLayerConfiguration()).getSecondActivationFunction(); //IActivation backprop method modifies the 'firstHalf' and 'secondHalf' arrays in-place, to contain dL/dz activation1.backprop(firstHalf, epsilonFirstHalf); activation2.backprop(secondHalf, epsilonSecondHalf); - //The remaining code for this method: just copy & pasted from BaseLayer.backpropGradient + //The remaining code for this method: just copy & pasted from BaseLayerConfiguration.backpropGradient // INDArray delta = epsilon.muli(activationDerivative); if (maskArray != null) { activationDerivative.muliColumnVector(maskArray); @@ -128,7 +127,7 @@ public class CustomLayerImpl extends BaseLayer { //Generic paramete ret.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, weightGrad); ret.gradientForVariable().put(DefaultParamInitializer.BIAS_KEY, biasGrad); - INDArray epsilonNext = paramsTable.get(DefaultParamInitializer.WEIGHT_KEY).mmul(activationDerivative.transpose()).transpose(); + INDArray epsilonNext = getParamTable().get(DefaultParamInitializer.WEIGHT_KEY).mmul(activationDerivative.transpose()).transpose(); return new Pair<>(ret, epsilonNext); } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/samediff/CompareTrainingImplementations.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/samediff/CompareTrainingImplementations.java index 03b8192f4..b4edb0ba8 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/samediff/CompareTrainingImplementations.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/samediff/CompareTrainingImplementations.java @@ -190,7 +190,7 @@ public class CompareTrainingImplementations extends BaseDL4JTest { //Check score - double scoreDl4j = net.score(); + double scoreDl4j = net.getScore(); double scoreSd = map.get(lossMse.name()).getDouble(0) + sd.calcRegularizationScore(); assertEquals(scoreDl4j, scoreSd, 1e-6, testName); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/CrashReportingUtilTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/CrashReportingUtilTest.java index 49a9c7fa1..e0eeef88d 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/CrashReportingUtilTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/CrashReportingUtilTest.java @@ -104,7 +104,7 @@ public class CrashReportingUtilTest extends BaseDL4JTest { MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); - net.addListeners(new ScoreIterationListener(1)); + net.addTrainingListeners(new ScoreIterationListener(1)); //Test net that hasn't been trained yet Exception e = new Exception(); @@ -161,7 +161,7 @@ public class CrashReportingUtilTest extends BaseDL4JTest { CrashReportingUtil.crashDumpOutputDirectory(dir); ComputationGraph cg = net.toComputationGraph(); - cg.setListeners(new ScoreIterationListener(1)); + cg.addTrainingListeners(new ScoreIterationListener(1)); //Test net that hasn't been trained yet CrashReportingUtil.writeMemoryCrashDump(cg, e); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/ModelGuesserTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/ModelGuesserTest.java index 4415c5455..e941c75ee 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/ModelGuesserTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/ModelGuesserTest.java @@ -156,7 +156,7 @@ public class ModelGuesserTest extends BaseDL4JTest { MultiLayerNetwork network = (MultiLayerNetwork) ModelGuesser.loadModelGuess(tempFile.getAbsolutePath()); assertEquals(network.getNetConfiguration().toJson(), net.getNetConfiguration().toJson()); - assertEquals(net.params(), network.params()); + assertEquals(net.getModelParams(), network.getModelParams()); assertEquals(net.getUpdater().getStateViewArray(), network.getUpdater().getStateViewArray()); } @@ -173,7 +173,7 @@ public class ModelGuesserTest extends BaseDL4JTest { MultiLayerNetwork network = (MultiLayerNetwork) ModelGuesser.loadModelGuess(inputStream); Assertions.assertNotNull(network); assertEquals(network.getNetConfiguration().toJson(), net.getNetConfiguration().toJson()); - assertEquals(net.params(), network.params()); + assertEquals(net.getModelParams(), network.getModelParams()); assertEquals(net.getUpdater().getStateViewArray(), network.getUpdater().getStateViewArray()); } } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/ModelSerializerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/ModelSerializerTest.java index 5124e15ac..495b403d5 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/ModelSerializerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/ModelSerializerTest.java @@ -81,7 +81,7 @@ public class ModelSerializerTest extends BaseDL4JTest { MultiLayerNetwork network = ModelSerializer.restoreMultiLayerNetwork(tempFile); assertEquals(network.getNetConfiguration().toJson(), net.getNetConfiguration().toJson()); - assertEquals(net.params(), network.params()); + assertEquals(net.getModelParams(), network.getModelParams()); assertEquals(net.getUpdater().getStateViewArray(), network.getUpdater().getStateViewArray()); } @@ -125,7 +125,7 @@ public class ModelSerializerTest extends BaseDL4JTest { MultiLayerNetwork network = ModelSerializer.restoreMultiLayerNetwork(fis); assertEquals(network.getNetConfiguration().toJson(), net.getNetConfiguration().toJson()); - assertEquals(net.params(), network.params()); + assertEquals(net.getModelParams(), network.getModelParams()); assertEquals(net.getUpdater().getStateViewArray(), network.getUpdater().getStateViewArray()); } @@ -151,7 +151,7 @@ public class ModelSerializerTest extends BaseDL4JTest { ComputationGraph network = ModelSerializer.restoreComputationGraph(tempFile); assertEquals(network.getComputationGraphConfiguration().toJson(), cg.getComputationGraphConfiguration().toJson()); - assertEquals(cg.params(), network.params()); + assertEquals(cg.getModelParams(), network.getModelParams()); assertEquals(cg.getUpdater().getStateViewArray(), network.getUpdater().getStateViewArray()); } @@ -177,7 +177,7 @@ public class ModelSerializerTest extends BaseDL4JTest { ComputationGraph network = ModelSerializer.restoreComputationGraph(fis); assertEquals(network.getComputationGraphConfiguration().toJson(), cg.getComputationGraphConfiguration().toJson()); - assertEquals(cg.params(), network.params()); + assertEquals(cg.getModelParams(), network.getModelParams()); assertEquals(cg.getUpdater().getStateViewArray(), network.getUpdater().getStateViewArray()); } @@ -346,7 +346,7 @@ public class ModelSerializerTest extends BaseDL4JTest { //Also test reading both model and normalizer from stream (correctly) Pair pair = ModelSerializer.restoreMultiLayerNetworkAndNormalizer(new FileInputStream(tempFile), true); - assertEquals(net.params(), pair.getFirst().params()); + assertEquals(net.getModelParams(), pair.getFirst().getModelParams()); assertNotNull(pair.getSecond()); } @@ -395,7 +395,7 @@ public class ModelSerializerTest extends BaseDL4JTest { //Also test reading both model and normalizer from stream (correctly) Pair pair = ModelSerializer.restoreComputationGraphAndNormalizer(new FileInputStream(tempFile), true); - assertEquals(net.params(), pair.getFirst().params()); + assertEquals(net.getModelParams(), pair.getFirst().getModelParams()); assertNotNull(pair.getSecond()); } @@ -496,6 +496,6 @@ public class ModelSerializerTest extends BaseDL4JTest { assertTrue(entries.contains("otherData.bin")); ComputationGraph restoredNet = ModelSerializer.restoreComputationGraph(tempFile); - assertEquals(net.params(), restoredNet.params()); + assertEquals(net.getModelParams(), restoredNet.getModelParams()); } } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/TFOpLayer.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/TFOpLayer.java index 11bb40d58..86ebdf3ea 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/TFOpLayer.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/TFOpLayer.java @@ -21,7 +21,6 @@ package org.deeplearning4j.nn.modelimport.keras.layers; import org.deeplearning4j.nn.api.ParamInitializer; -import org.deeplearning4j.nn.conf.GradientNormalization; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.RNNFormat; @@ -80,10 +79,6 @@ public class TFOpLayer extends LayerConfiguration { public void setNIn(InputType inputType, boolean override){} - @Override - public GradientNormalization getGradientNormalization(){return null;} - - @Override public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, @@ -91,14 +86,11 @@ public class TFOpLayer extends LayerConfiguration { LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); TFOpLayerImpl tfOpLayerImpl = new TFOpLayerImpl(nodeDef, constants, lconf, networkDataType); - tfOpLayerImpl.setListeners(trainingListeners); + tfOpLayerImpl.addTrainingListeners(trainingListeners); tfOpLayerImpl.setIndex(layerIndex); return tfOpLayerImpl; } - @Override - public double getGradientNormalizationThreshold(){return 0.;} - @Override public List getRegularizationByParam(String paramName){return null;} diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLSTM.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLSTM.java index 97ceac993..b2e5a15a2 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLSTM.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLSTM.java @@ -31,7 +31,7 @@ import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.*; import org.deeplearning4j.nn.conf.layers.recurrent.LastTimeStep; import org.deeplearning4j.nn.conf.layers.util.MaskZeroLayer; -import org.deeplearning4j.nn.conf.layers.wrapper.BaseWrapperLayer; +import org.deeplearning4j.nn.conf.layers.wrapper.BaseWrapperLayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.KerasLayer; import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException; @@ -448,8 +448,8 @@ public class KerasLSTM extends KerasLayer { FeedForwardLayer ffl; - if(this.layer instanceof BaseWrapperLayer){ - BaseWrapperLayer bwl = (BaseWrapperLayer)this.layer; + if(this.layer instanceof BaseWrapperLayerConfiguration){ + BaseWrapperLayerConfiguration bwl = (BaseWrapperLayerConfiguration)this.layer; ffl = (FeedForwardLayer)bwl.getUnderlying(); } else { ffl = (FeedForwardLayer) this.layer; diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasSimpleRnn.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasSimpleRnn.java index 35a1aed01..3c850ecfa 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasSimpleRnn.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasSimpleRnn.java @@ -33,7 +33,7 @@ import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.recurrent.LastTimeStep; import org.deeplearning4j.nn.conf.layers.recurrent.SimpleRnn; import org.deeplearning4j.nn.conf.layers.util.MaskZeroLayer; -import org.deeplearning4j.nn.conf.layers.wrapper.BaseWrapperLayer; +import org.deeplearning4j.nn.conf.layers.wrapper.BaseWrapperLayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.KerasLayer; import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException; @@ -296,8 +296,8 @@ public class KerasSimpleRnn extends KerasLayer { } FeedForwardLayer ffl; - if(this.layer instanceof BaseWrapperLayer){ - BaseWrapperLayer bwl = (BaseWrapperLayer)this.layer; + if(this.layer instanceof BaseWrapperLayerConfiguration){ + BaseWrapperLayerConfiguration bwl = (BaseWrapperLayerConfiguration)this.layer; ffl = (FeedForwardLayer)bwl.getUnderlying(); } else { ffl = (FeedForwardLayer) this.layer; diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/KerasTestUtils.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/KerasTestUtils.java index 12a00d4f7..0ee7ce776 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/KerasTestUtils.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/KerasTestUtils.java @@ -20,7 +20,7 @@ package org.deeplearning4j.nn.modelimport.keras; -import org.deeplearning4j.nn.conf.layers.BaseLayer; +import org.deeplearning4j.nn.conf.layers.BaseLayerConfiguration; import org.deeplearning4j.nn.conf.layers.samediff.AbstractSameDiffLayer; import org.nd4j.linalg.learning.regularization.L1Regularization; import org.nd4j.linalg.learning.regularization.L2Regularization; @@ -34,7 +34,7 @@ public class KerasTestUtils { private KerasTestUtils(){ } - public static double getL1(BaseLayer layer) { + public static double getL1(BaseLayerConfiguration layer) { List l = layer.getRegularization(); return getL1(l); } @@ -49,7 +49,7 @@ public class KerasTestUtils { return l1Reg.getL1().valueAt(0,0); } - public static double getL2(BaseLayer layer) { + public static double getL2(BaseLayerConfiguration layer) { List l = layer.getRegularization(); return getL2(l); } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasModelEndToEndTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasModelEndToEndTest.java index 2fea0bb82..1dad7c549 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasModelEndToEndTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasModelEndToEndTest.java @@ -286,7 +286,7 @@ public class KerasModelEndToEndTest extends BaseDL4JTest { MultiLayerNetwork net = importEndModelTest(modelPath, inputsOutputPath, true, true, true, true); Layer outLayer = net.getOutputLayer(); assertTrue(outLayer instanceof org.deeplearning4j.nn.layers.LossLayer); - LossLayer llConf = (LossLayer) outLayer.getConfig(); + LossLayer llConf = (LossLayer) outLayer.getTrainingConfig(); assertEquals(new LossSparseMCXENT(), llConf.getLossFn()); } @@ -656,7 +656,7 @@ public class KerasModelEndToEndTest extends BaseDL4JTest { MultiLayerNetwork net = importEndModelTest(modelPath, inputsOutputPath, true, true, true, true, false, null, null); Layer l = net.getLayer(0); - Convolution1DLayer c1d = (Convolution1DLayer) l.getConfig(); + Convolution1DLayer c1d = (Convolution1DLayer) l.getTrainingConfig(); assertEquals(ConvolutionMode.Causal, c1d.getConvolutionMode()); } } diff --git a/cavis-dnn/cavis-dnn-nlp/src/test/java/org/deeplearning4j/models/word2vec/Word2VecTestsSmall.java b/cavis-dnn/cavis-dnn-nlp/src/test/java/org/deeplearning4j/models/word2vec/Word2VecTestsSmall.java index 49237b20c..bd89639b3 100644 --- a/cavis-dnn/cavis-dnn-nlp/src/test/java/org/deeplearning4j/models/word2vec/Word2VecTestsSmall.java +++ b/cavis-dnn/cavis-dnn-nlp/src/test/java/org/deeplearning4j/models/word2vec/Word2VecTestsSmall.java @@ -209,6 +209,6 @@ public class Word2VecTestsSmall extends BaseDL4JTest { final MultiLayerNetwork restored = ModelSerializer.restoreMultiLayerNetwork(bais, true); assertEquals(net.getNetConfiguration(), restored.getNetConfiguration()); - assertTrue(net.params().equalsWithEps(restored.params(), 2e-3)); + assertTrue(net.getModelParams().equalsWithEps(restored.getModelParams(), 2e-3)); } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/ILayerConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/ILayerConfiguration.java index e0f5d856b..1462661bb 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/ILayerConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/ILayerConfiguration.java @@ -23,41 +23,5 @@ package net.brutex.ai.dnn.api; public interface ILayerConfiguration { - /** - * Create and return an instance of a ILayerConfiguration. - * - * @param network the "holding" network for the instance - * @return the new layer instance - */ - ILayer instantiate(IModel network); - - - /** - * Defines the valid input type for this ILayer - * - * @return InputType - */ - org.deeplearning4j.nn.conf.inputs.InputType.Type getInputType(); - - - /** - * Defines the valid input type for this ILayer - * - * @return InputType - */ - org.deeplearning4j.nn.conf.inputs.InputType.Type getOutputType(); - - - /** - * Number of trainable parameter in this layer - * @return number of parameter - */ - long numParameters(); - - /** - * An implementation should provide a method to validate the network - * @return true if no errors found; false otherwise - */ - boolean isValid(); - + } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/IModel.java b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/IModel.java index 9f81fd3d8..3f84a7004 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/IModel.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/IModel.java @@ -24,6 +24,7 @@ package net.brutex.ai.dnn.api; import java.util.Collection; import java.util.Map; import lombok.NonNull; +import org.deeplearning4j.nn.api.ITrainableLayer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; @@ -44,16 +45,17 @@ import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator; * {@link #getNetConfiguration()} methods. **/ -public interface IModel { +public interface IModel extends ITrainableLayer { /** - * The param table + * The full param table for the model. Each layer may get a subset of its parameters. * - * @return + * @return full table of parameters */ - Map getParamTable(); + Map getParamTable(boolean backpropOnly); + void setParamTable(Map paramTable); /** * This method returns updater state (if applicable), null otherwise @@ -113,6 +115,11 @@ public interface IModel { */ T[] doEvaluation(MultiDataSetIterator iterator, T... evaluations); + /** + * Get the configuration of this model. + * + * @return the neural net configuration + */ NeuralNetConfiguration getNetConfiguration(); void setNetConfiguration(@NonNull NeuralNetConfiguration netConfiguration); @@ -124,6 +131,7 @@ public interface IModel { /** * Get the number of parameters in this model + * * @return number of parameters */ long numParams(); @@ -148,11 +156,12 @@ public interface IModel { /** - * The score for the model + * The score for the model. No calculation occurs, this simply returns the score calculated before + * by the {@link #computeGradientAndScore(LayerWorkspaceMgr)} method. * * @return the score for the model */ - double score(); + double getScore(); /** @@ -165,7 +174,7 @@ public interface IModel { * * @return the parameters of the model */ - INDArray params(); + INDArray getModelParams(); /** @@ -243,15 +252,16 @@ public interface IModel { /** * Get a parameter array for a given parameter type key + * * @param param the key of the parameter * @return ndarray of parameters */ INDArray getParam(String param); - /** * Set the parameters for a given parameter type. + * * @param key the param type key to set * @param val the new parameters ndarray */ @@ -273,20 +283,19 @@ public interface IModel { /** * Get the TrainingListeners + * * @return training listener */ - Collection getListeners(); + Collection getTrainingListeners(); /** * Replace the TrainingListeners for this model + * * @param listeners new listeners */ - void setListeners(TrainingListener... listeners); - void setListeners(Collection listeners); + void addTrainingListeners(TrainingListener... listeners); + + void addTrainingListeners(Collection listeners); + - /** - * Add TrainingListeners to the model - * @param listener listener to add - */ - void addListeners(TrainingListener... listener); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/INeuralNetworkConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/INeuralNetworkConfiguration.java index b317e4ab0..02ae2d45f 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/INeuralNetworkConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/INeuralNetworkConfiguration.java @@ -29,6 +29,12 @@ public interface INeuralNetworkConfiguration extends Serializable, Cloneable { INeuralNetworkConfiguration clone(); void init(); + + /** + * The model (if initiated) + * @return + */ + IModel getNet(); } /** /** diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/NN.java b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/NN.java index 3e13e811a..8d6f778d0 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/NN.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/NN.java @@ -23,6 +23,7 @@ package net.brutex.ai.dnn.api; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration.NeuralNetConfigurationBuilder; +import org.deeplearning4j.nn.conf.layers.DenseLayer; /** * A fluent API to configure and create artificial neural networks diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/conf/layer/Layer_Descriptions.md b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/conf/layer/Layer_Descriptions.md new file mode 100644 index 000000000..74343c891 --- /dev/null +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/conf/layer/Layer_Descriptions.md @@ -0,0 +1,31 @@ +# Layer Descriptions # + +## abstract LayerConfiguration and Interface TrainingConfig ## + +Every layer configuration is inherited from LayerConfiguration (and some also from TrainableLayerConfiguration) + + +### NoParamLayer ### + +The following are examples of No ParamLayers. No parameter layers are not inheriting from BaseConfigurationLayer, +but directly from LayerConfiguration. + +* ActivationLayer +* SubsamplingLayer +* ZeroPadding1DLayer +* MaskLayer +* CroppingLayer +* GlobalPoolingLayer + +### SameDiffLayer ### + +### BaseWrapperLayer ### + +### FrozenLayer ### + +### LocalResponseNormalization ### + +### Bidirectional ### + +### TFOpLayer ### + diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/networks/ArtificialNeuralNetwork.java b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/networks/ArtificialNeuralNetwork.java index 2b900a5ff..aa0465659 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/networks/ArtificialNeuralNetwork.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/networks/ArtificialNeuralNetwork.java @@ -21,6 +21,8 @@ package net.brutex.ai.dnn.networks; +import java.util.Arrays; +import java.util.HashMap; import java.util.Map; import lombok.Getter; import lombok.NonNull; @@ -58,6 +60,62 @@ public abstract class ArtificialNeuralNetwork implements IModel { @NonNull private NeuralNetConfiguration netConfiguration; + @Getter + @Setter + private Map paramTable; + + /** + * Table of parameters by key, for backprop. For many models (dense layers, etc) - all parameters + * are backprop parameters + * + * @param backpropParamsOnly If true, return backprop params only. If false: return all params + * (equivalent to paramsTable()) + */ + @Override + public Map getParamTable(boolean backpropParamsOnly) { + return paramTable; + } + + + /** + * Set the parameters of the network. Note that the parameter keys must match the format as + * described in {@link #getParam(String)} and {@link #getParamTable()}. Note that the values of the + * parameters used as an argument to this method are copied - i.e., it is safe to later + * modify/reuse the values in the provided paramTable without this impacting the network. + * + * @param paramTable Parameters to set + */ + @Override + public void setParamTable(Map paramTable) { + Map currParamTable = getParamTable(); + if(currParamTable == null) { + currParamTable = paramTable; + } else if (!currParamTable.keySet().equals(paramTable.keySet())) { + throw new IllegalArgumentException( + "Cannot set param table: parameter keys do not match.\n" + "Current: " + + currParamTable.keySet() + "\nTo set: " + paramTable.keySet()); + } + + for (String s : paramTable.keySet()) { + INDArray curr = currParamTable.get(s); + INDArray toSet = paramTable.get(s); + if (!Arrays.equals(curr.shape(), toSet.shape())) { + throw new IllegalArgumentException( + "Cannot set parameter table: parameter \"" + s + "\" shapes " + + "do not match. Current = " + Arrays.toString(curr.shape()) + ", to set = " + + Arrays.toString(toSet.shape())); + } + } + + //Now that we've checked ALL params (to avoid leaving net in half-modified state) + for (String s : paramTable.keySet()) { + INDArray curr = currParamTable.get(s); + INDArray toSet = paramTable.get(s); + curr.assign(toSet); + } + } + + /** * Create a new network from configuration diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/trainer/BaseEarlyStoppingTrainer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/trainer/BaseEarlyStoppingTrainer.java index db65ca7bb..770512e4d 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/trainer/BaseEarlyStoppingTrainer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/trainer/BaseEarlyStoppingTrainer.java @@ -168,12 +168,12 @@ public abstract class BaseEarlyStoppingTrainer implements IEar if(pretrain){ //TODO support for non-first-layer pretraining if(model instanceof MultiLayerNetwork){ - lastScore = (((MultiLayerNetwork) model).getLayer(0)).score(); + lastScore = (((MultiLayerNetwork) model).getLayer(0)).getScore(); } else { - lastScore = (((ComputationGraph) model).getLayer(0)).score(); + lastScore = (((ComputationGraph) model).getLayer(0)).getScore(); } } else { - lastScore = model.score(); + lastScore = model.getScore(); } for (IterationTerminationCondition c : esConfig.getIterationTerminationConditions()) { if (c.terminate(lastScore)) { @@ -341,11 +341,11 @@ public abstract class BaseEarlyStoppingTrainer implements IEar Collection listeners; if(model instanceof MultiLayerNetwork){ MultiLayerNetwork n = ((MultiLayerNetwork) model); - listeners = n.getListeners(); + listeners = n.getTrainingListeners(); n.setEpochCount(epochNum); } else if(model instanceof ComputationGraph){ ComputationGraph cg = ((ComputationGraph) model); - listeners = cg.getListeners(); + listeners = cg.getTrainingListeners(); cg.getComputationGraphConfiguration().setEpochCount(epochNum); } else { return; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/gradientcheck/GradientCheckUtil.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/gradientcheck/GradientCheckUtil.java index d106f827f..0cccc2a4f 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/gradientcheck/GradientCheckUtil.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/gradientcheck/GradientCheckUtil.java @@ -23,6 +23,7 @@ package org.deeplearning4j.gradientcheck; import lombok.*; import lombok.experimental.Accessors; import lombok.extern.slf4j.Slf4j; +import org.deeplearning4j.nn.conf.layers.BaseLayerConfiguration; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.exception.ND4JArraySizeException; @@ -32,10 +33,8 @@ import org.nd4j.common.primitives.Pair; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.Updater; import org.deeplearning4j.nn.api.layers.IOutputLayer; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.graph.GraphVertex; import org.deeplearning4j.nn.conf.graph.LayerVertex; -import org.deeplearning4j.nn.conf.layers.BaseLayer; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.layers.BaseOutputLayer; @@ -83,12 +82,12 @@ public class GradientCheckUtil { IActivation afn = null; if(outputLayer instanceof BaseOutputLayer){ BaseOutputLayer o = (BaseOutputLayer)outputLayer; - lfn = ((org.deeplearning4j.nn.conf.layers.BaseOutputLayer)o.layerConf()).getLossFn(); + lfn = ((org.deeplearning4j.nn.conf.layers.BaseOutputLayer)o.getTypedLayerConfiguration()).getLossFn(); afn = o.getLayerConfiguration().getActivationFn(); } else if(outputLayer instanceof LossLayer){ LossLayer o = (LossLayer) outputLayer; - lfn = o.layerConf().getLossFn(); - afn = o.layerConf().getActivationFn(); + lfn = o.getTypedLayerConfiguration().getLossFn(); + afn = o.getTypedLayerConfiguration().getActivationFn(); } if (lfn instanceof LossMCXENT && afn instanceof ActivationSoftmax && ((LossMCXENT) lfn).getSoftmaxClipEps() != 0) { @@ -211,17 +210,17 @@ public class GradientCheckUtil { + "is: " + netDataType + "). Double precision must be used for gradient checks. Create network with .dataType(DataType.DOUBLE) before using GradientCheckUtil"); } - if(netDataType != c.net.params().dataType()){ + if(netDataType != c.net.getModelParams().dataType()){ throw new IllegalStateException("Parameters datatype does not match network configuration datatype (" - + "is: " + c.net.params().dataType() + "). If network datatype is set to DOUBLE, parameters must also be DOUBLE."); + + "is: " + c.net.getModelParams().dataType() + "). If network datatype is set to DOUBLE, parameters must also be DOUBLE."); } //Check network configuration: int layerCount = 0; for (LayerConfiguration n : c.net.getNetConfiguration().getFlattenedLayerConfigurations()) { - if (n instanceof BaseLayer) { - BaseLayer bl = (BaseLayer) n; + if (n instanceof BaseLayerConfiguration) { + BaseLayerConfiguration bl = (BaseLayerConfiguration) n; IUpdater u = bl.getIUpdater(); if (u instanceof Sgd) { //Must have LR of 1.0 @@ -274,7 +273,7 @@ public class GradientCheckUtil { updater.update(c.net, gradAndScore.getFirst(), 0, 0, c.net.batchSize(), LayerWorkspaceMgr.noWorkspaces()); INDArray gradientToCheck = gradAndScore.getFirst().gradient().dup(); //need dup: gradients are a *view* of the full gradient array (which will change every time backprop is done) - INDArray originalParams = c.net.params().dup(); //need dup: params are a *view* of full parameters + INDArray originalParams = c.net.getModelParams().dup(); //need dup: params are a *view* of full parameters val nParams = originalParams.length(); @@ -323,7 +322,7 @@ public class GradientCheckUtil { log.info("NOTE: parameters will be skipped due to config: {}", c.excludeParams); } - INDArray params = c.net.params(); //Assumption here: params is a view that we can modify in-place + INDArray params = c.net.getModelParams(); //Assumption here: params is a view that we can modify in-place for (long i = 0; i < nParams; ) { //Get param name if (i >= paramEnds[currParamNameIdx]) { @@ -438,9 +437,9 @@ public class GradientCheckUtil { + "is: " + netDataType + "). Double precision must be used for gradient checks. Create network with .dataType(DataType.DOUBLE) before using GradientCheckUtil"); } - if(netDataType != c.net.params().dataType()){ + if(netDataType != c.net.getModelParams().dataType()){ throw new IllegalStateException("Parameters datatype does not match network configuration datatype (" - + "is: " + c.net.params().dataType() + "). If network datatype is set to DOUBLE, parameters must also be DOUBLE."); + + "is: " + c.net.getModelParams().dataType() + "). If network datatype is set to DOUBLE, parameters must also be DOUBLE."); } //Check configuration @@ -451,8 +450,8 @@ public class GradientCheckUtil { continue; LayerVertex lv = (LayerVertex) gv; - if (lv.getLayerConfiguration() instanceof BaseLayer) { - BaseLayer bl = (BaseLayer) lv.getLayerConfiguration(); + if (lv.getLayerConfiguration() instanceof BaseLayerConfiguration) { + BaseLayerConfiguration bl = (BaseLayerConfiguration) lv.getLayerConfiguration(); IUpdater u = bl.getIUpdater(); if (u instanceof Sgd) { //Must have LR of 1.0 @@ -510,7 +509,7 @@ public class GradientCheckUtil { updater.update(gradAndScore.getFirst(), 0, 0, c.net.batchSize(), LayerWorkspaceMgr.noWorkspaces()); INDArray gradientToCheck = gradAndScore.getFirst().gradient().dup(); //need dup: gradients are a *view* of the full gradient array (which will change every time backprop is done) - INDArray originalParams = c.net.params().dup(); //need dup: params are a *view* of full parameters + INDArray originalParams = c.net.getModelParams().dup(); //need dup: params are a *view* of full parameters val nParams = originalParams.length(); @@ -530,7 +529,7 @@ public class GradientCheckUtil { int totalNFailures = 0; double maxError = 0.0; MultiDataSet mds = new MultiDataSet(c.inputs, c.labels, c.inputMask, c.labelMask); - INDArray params = c.net.params(); //Assumption here: params is a view that we can modify in-place + INDArray params = c.net.getModelParams(); //Assumption here: params is a view that we can modify in-place for (long i = 0; i < nParams; i++) { //Get param name if (i >= paramEnds[currParamNameIdx]) { @@ -643,7 +642,7 @@ public class GradientCheckUtil { updater.update(layer, gradAndScore.getFirst(), 0, 0, layer.batchSize(), LayerWorkspaceMgr.noWorkspaces()); INDArray gradientToCheck = gradAndScore.getFirst().gradient().dup(); //need dup: gradients are a *view* of the full gradient array (which will change every time backprop is done) - INDArray originalParams = layer.params().dup(); //need dup: params are a *view* of full parameters + INDArray originalParams = layer.getParams().dup(); //need dup: params are a *view* of full parameters val nParams = originalParams.length(); @@ -660,7 +659,7 @@ public class GradientCheckUtil { double maxError = 0.0; int currParamNameIdx = 0; - INDArray params = layer.params(); //Assumption here: params is a view that we can modify in-place + INDArray params = layer.getParams(); //Assumption here: params is a view that we can modify in-place for (int i = 0; i < nParams; i++) { //Get param name if (i >= paramEnds[currParamNameIdx]) { @@ -675,13 +674,13 @@ public class GradientCheckUtil { //TODO add a 'score' method that doesn't calculate gradients... Nd4j.getRandom().setSeed(rngSeed); layer.computeGradientAndScore(mgr); - double scorePlus = layer.score(); + double scorePlus = layer.getScore(); //(w-epsilon): Do forward pass and score params.putScalar(i, origValue - epsilon); Nd4j.getRandom().setSeed(rngSeed); layer.computeGradientAndScore(mgr); - double scoreMinus = layer.score(); + double scoreMinus = layer.getScore(); //Reset original param value params.putScalar(i, origValue); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/Trainable.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/ITrainableLayer.java similarity index 90% rename from cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/Trainable.java rename to cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/ITrainableLayer.java index 33f87a736..d9c85d1f3 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/Trainable.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/ITrainableLayer.java @@ -20,16 +20,21 @@ package org.deeplearning4j.nn.api; +import java.util.Map; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.Map; +public interface ITrainableLayer { -public interface Trainable { + + Map getParamTable(); + Map getParamTable(boolean isBackprop); + + void setParamTable(Map paramTable); /** * @return Training configuration */ - TrainingConfig getConfig(); + ITraininableLayerConfiguration getTrainingConfig(); /** * @return Number of parameters @@ -39,14 +44,15 @@ public interface Trainable { /** * @return 1d parameter vector */ - INDArray params(); + INDArray getParams(); /** * The param table * * @return - */ + Map getParamTable(); + */ /** * Table of parameters by key, for backprop. For many models (dense layers, etc) - all parameters @@ -54,16 +60,15 @@ public interface Trainable { * * @param backpropParamsOnly If true, return backprop params only. If false: return all params * (equivalent to paramsTable()) - */ - Map getParamTable(boolean backpropParamsOnly); + Map getParamTable(boolean backpropParamsOnly); +*/ /** * Setter for the param table * * @param paramTable - */ - void setParamTable(Map paramTable); - + void setParamTable(Map paramTable); +*/ /** diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/TrainingConfig.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/ITraininableLayerConfiguration.java similarity index 96% rename from cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/TrainingConfig.java rename to cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/ITraininableLayerConfiguration.java index 58f101260..40a3170b4 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/TrainingConfig.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/ITraininableLayerConfiguration.java @@ -27,7 +27,7 @@ import org.nd4j.linalg.learning.regularization.Regularization; import java.util.List; -public interface TrainingConfig { +public interface ITraininableLayerConfiguration { /** * @return Name of the layer @@ -55,7 +55,7 @@ public interface TrainingConfig { boolean isPretrainParam(String paramName); /** - * Get the updater for the given parameter. Typically the same updater will be used for all updaters, but this + * Get the updater for the given parameter. Typically the same updater will be used for all parameters, but this * is not necessarily the case * * @param paramName Parameter name @@ -74,5 +74,4 @@ public interface TrainingConfig { double getGradientNormalizationThreshold(); void setDataType(DataType dataType); - } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/Layer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/Layer.java index 41051df53..7ff694e99 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/Layer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/Layer.java @@ -21,7 +21,7 @@ package org.deeplearning4j.nn.api; -import lombok.NonNull; +import java.util.Map; import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.conf.CacheMode; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -29,12 +29,10 @@ import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.LayerHelper; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -import org.deeplearning4j.optimize.api.TrainingListener; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.common.primitives.Pair; import java.io.Serializable; -import java.util.Collection; /** * A layer is the highest-level building block in deep learning. A layer is a container that usually @@ -46,7 +44,7 @@ import java.util.Collection; * * @see NVIDIA Deep Learning In A Nutshell */ -public interface Layer extends Serializable, Cloneable, Trainable, IModel { +public interface Layer extends Serializable, Cloneable, IModel { //IModel /** * Return the configuration of this layer @@ -234,6 +232,11 @@ public interface Layer extends Serializable, Cloneable, Trainable, IModel { LayerHelper getHelper(); + /** + * Get a reference to the network this layer is part of. + * @return + */ + IModel getNet(); enum Type { FEED_FORWARD, RECURRENT, CONVOLUTIONAL, CONVOLUTIONAL3D, SUBSAMPLING, UPSAMPLING, RECURSIVE, MULTILAYER, NORMALIZATION diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/Updater.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/Updater.java index 2c01298cb..ae301a40c 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/Updater.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/Updater.java @@ -40,7 +40,7 @@ public interface Updater extends Serializable { * @param viewArray View array * @param initialize Whether to initialize the array or not */ - void setStateViewArray(Trainable layer, INDArray viewArray, boolean initialize); + void setStateViewArray(ITrainableLayer layer, INDArray viewArray, boolean initialize); /** * @return the view array for this updater @@ -54,5 +54,5 @@ public interface Updater extends Serializable { * @param gradient * @param iteration */ - void update(Trainable layer, Gradient gradient, int iteration, int epoch, int miniBatchSize, LayerWorkspaceMgr workspaceMgr); + void update(ITrainableLayer layer, Gradient gradient, int iteration, int epoch, int miniBatchSize, LayerWorkspaceMgr workspaceMgr); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/ComputationGraphConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/ComputationGraphConfiguration.java index afba61743..e5e94ef3c 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/ComputationGraphConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/ComputationGraphConfiguration.java @@ -27,7 +27,7 @@ import org.deeplearning4j.nn.conf.graph.LayerVertex; import org.deeplearning4j.nn.conf.graph.MergeVertex; import org.deeplearning4j.nn.conf.graph.rnn.LastTimeStepVertex; import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.conf.layers.BaseLayer; +import org.deeplearning4j.nn.conf.layers.BaseLayerConfiguration; import org.deeplearning4j.nn.conf.layers.GlobalPoolingLayer; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.recurrent.LastTimeStep; @@ -209,7 +209,8 @@ public class ComputationGraphConfiguration implements Serializable, Cloneable { if (lv.getNetConfiguration() != null && lv.getLayerConfiguration() != null) { LayerConfiguration layer = lv.getLayerConfiguration(); - if (layer instanceof BaseLayer && ((BaseLayer) layer).getActivationFn() == null) { + if (layer instanceof BaseLayerConfiguration + && ((BaseLayerConfiguration) layer).getActivationFn() == null) { String layerName = layer.getLayerName(); try { @@ -235,7 +236,7 @@ public class ComputationGraphConfiguration implements Serializable, Cloneable { if (activationFunction != null) { IActivation ia = Activation.fromString(activationFunction.asText()).getActivationFunction(); - ((BaseLayer) layer).setActivationFn(ia); + ((BaseLayerConfiguration) layer).setActivationFn(ia); } } catch (IOException e) { @@ -257,7 +258,8 @@ public class ComputationGraphConfiguration implements Serializable, Cloneable { * @return True if all is well and layer iteration shall continue. False else-wise. */ private static void handleLegacyWeightInitFromJson(String json, LayerConfiguration layer, ObjectMapper mapper, JsonNode vertices) { - if (layer instanceof BaseLayer && ((BaseLayer) layer).getWeightInitFn() == null) { + if (layer instanceof BaseLayerConfiguration + && ((BaseLayerConfiguration) layer).getWeightInitFn() == null) { String layerName = layer.getLayerName(); try { @@ -289,7 +291,7 @@ public class ComputationGraphConfiguration implements Serializable, Cloneable { if (weightInit != null) { final IWeightInit wi = WeightInit.valueOf(weightInit.asText()).getWeightInitFunction(dist); - ((BaseLayer) layer).setWeightInitFn(wi); + ((BaseLayerConfiguration) layer).setWeightInitFn(wi); } } catch (IOException e) { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetBaseBuilderConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetBaseBuilderConfiguration.java index 8ff512612..a11c21adc 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetBaseBuilderConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetBaseBuilderConfiguration.java @@ -31,13 +31,11 @@ import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Random; -import lombok.Builder; import lombok.Data; import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.NonNull; import lombok.Setter; -import lombok.Singular; import lombok.experimental.SuperBuilder; import lombok.extern.slf4j.Slf4j; import net.brutex.ai.dnn.api.INeuralNetworkConfiguration; @@ -47,7 +45,7 @@ import org.deeplearning4j.nn.conf.distribution.Distribution; import org.deeplearning4j.nn.conf.dropout.Dropout; import org.deeplearning4j.nn.conf.dropout.IDropout; import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.conf.layers.BaseLayer; +import org.deeplearning4j.nn.conf.layers.BaseLayerConfiguration; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.serde.JsonMappers; @@ -520,7 +518,7 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor private static boolean handleLegacyWeightInitFromJson(String json, LayerConfiguration l, ObjectMapper mapper, JsonNode confs, int layerCount) { - if ((l instanceof BaseLayer) && ((BaseLayer) l).getWeightInitFn() == null) { + if ((l instanceof BaseLayerConfiguration) && ((BaseLayerConfiguration) l).getWeightInitFn() == null) { try { JsonNode jsonNode = mapper.readTree(json); if (confs == null) { @@ -551,7 +549,7 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor if (weightInit != null) { final IWeightInit wi = WeightInit.valueOf(weightInit.asText()) .getWeightInitFunction(dist); - ((BaseLayer) l).setWeightInitFn(wi); + ((BaseLayerConfiguration) l).setWeightInitFn(wi); } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java index cb87e885c..ed5a406b4 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java @@ -20,7 +20,10 @@ package org.deeplearning4j.nn.conf; +import com.fasterxml.jackson.annotation.JsonIdentityInfo; +import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.ObjectIdGenerators; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.exc.InvalidTypeIdException; @@ -41,13 +44,14 @@ import lombok.experimental.SuperBuilder; import lombok.extern.jackson.Jacksonized; import lombok.extern.slf4j.Slf4j; import lombok.val; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.conf.distribution.Distribution; import org.deeplearning4j.nn.conf.dropout.Dropout; import org.deeplearning4j.nn.conf.dropout.IDropout; import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.conf.layers.BaseLayer; +import org.deeplearning4j.nn.conf.layers.BaseLayerConfiguration; import org.deeplearning4j.nn.conf.layers.BaseOutputLayer; import org.deeplearning4j.nn.conf.layers.BaseRecurrentLayer; import org.deeplearning4j.nn.conf.layers.Convolution1DLayer; @@ -65,6 +69,7 @@ import org.deeplearning4j.nn.conf.memory.NetworkMemoryReport; import org.deeplearning4j.nn.conf.serde.JsonMappers; import org.deeplearning4j.nn.conf.stepfunctions.StepFunction; import org.deeplearning4j.nn.conf.weightnoise.IWeightNoise; +import org.deeplearning4j.nn.conf.weightnoise.WeightNoise; import org.deeplearning4j.nn.weights.IWeightInit; import org.deeplearning4j.nn.weights.WeightInit; import org.deeplearning4j.util.OutputLayerUtil; @@ -116,11 +121,15 @@ import org.nd4j.linalg.lossfunctions.impl.LossNegativeLogLikelihood; @EqualsAndHashCode(exclude = {"iterationCount", "epochCount"}) @Jacksonized @JsonIgnoreProperties(ignoreUnknown = true) +@JsonIdentityInfo(generator= ObjectIdGenerators.IntSequenceGenerator.class, property="@id") + //The inner builder, that we can then extend ... @SuperBuilder //TODO fix access public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { + private IModel net; private static final int DEFAULT_TBPTT_LENGTH = 20; + private boolean initCalled = false; /** * Set constraints to be applied to all layers. Default: no constraints.
@@ -634,7 +643,7 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { //Also, pre 0.7.2: activation functions were Strings ("activationFunction" field), not classes ("activationFn") //Try to load the old format if necessary, and create the appropriate IActivation instance - if ((l instanceof BaseLayer) && ((BaseLayer) l).getActivationFn() == null) { + if ((l instanceof BaseLayerConfiguration) && ((BaseLayerConfiguration) l).getActivationFn() == null) { try { JsonNode jsonNode = mapper.readTree(json); if (confs == null) { @@ -660,7 +669,7 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { if (activationFunction != null) { IActivation ia = Activation.fromString(activationFunction.asText()) .getActivationFunction(); - ((BaseLayer) l).setActivationFn(ia); + ((BaseLayerConfiguration) l).setActivationFn(ia); } } @@ -689,7 +698,7 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { private static boolean handleLegacyWeightInitFromJson(String json, LayerConfiguration l, ObjectMapper mapper, JsonNode confs, int layerCount) { - if ((l instanceof BaseLayer) && ((BaseLayer) l).getWeightInitFn() == null) { + if ((l instanceof BaseLayerConfiguration) && ((BaseLayerConfiguration) l).getWeightInitFn() == null) { try { JsonNode jsonNode = mapper.readTree(json); if (confs == null) { @@ -720,7 +729,7 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { if (weightInit != null) { final IWeightInit wi = WeightInit.valueOf(weightInit.asText()) .getWeightInitFunction(dist); - ((BaseLayer) l).setWeightInitFn(wi); + ((BaseLayerConfiguration) l).setWeightInitFn(wi); } } @@ -825,8 +834,39 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { */ @Override public void init() { - getNetConfigurations().stream().forEach( conf -> conf.init()); //call init on all embedded configurations - innerConfigurations.add(0, this); //put this configuration at first place + if(initCalled) return; + initCalled=true; + + /** + * Run init() for each layer + */ + + getNetConfigurations().stream().forEach( conf -> { + conf.init(); //do not call on self + }); //call init on all embedded net configurations + innerConfigurations.add(0, this); //put this configuration at first place + + /** + * Inherit network wide configuration setting to those layer configurations + * that do not have an individual setting (nor a default) + */ + for(LayerConfiguration lconf : this.getFlattenedLayerConfigurations()) { + if(lconf.getActivationFn() == null ) lconf.setActivationFn(this.getActivationFn()); + if(lconf.getIUpdater() == null ) lconf.setIUpdater( this.getIUpdater() ); + if(lconf.getIDropout() == null ) lconf.setIDropout( this.getIdropOut() ); + if(lconf.getWeightNoise() == null ) lconf.setWeightNoise( this.getWeightNoise()); + + // ... maybe more to set here ... + if(lconf instanceof BaseLayerConfiguration ) { // then we can set some additional config settings + BaseLayerConfiguration bconf = (BaseLayerConfiguration) lconf; + if(bconf.getBiasUpdater() == null) bconf.setBiasUpdater(this.getBiasUpdater()); + if(bconf.getGradientNormalization() == null) bconf.setGradientNormalization(this.getGradientNormalization()); + // ... maybe more to set here ... + } + } + + + getLayerConfigurations().stream().forEach( lconf -> lconf.setNetConfiguration(this)); //set this as net config for all layers (defined in here, not stacked @@ -1009,6 +1049,7 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { public List netWideVariables() { + return netWideVariables; } @@ -1131,7 +1172,7 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { * and getFlattenedLayerConfigurations().get(0); * @return */ - @Deprecated + @Deprecated @JsonIgnore public LayerConfiguration getFirstLayer() { log.warn("This getFirstLayer method is an ugly workaround and will be removed."); return getFlattenedLayerConfigurations().get(0); @@ -1155,5 +1196,12 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { } + public IModel getNeuralNet() { + return net; + } + + public void setNeuralNet(IModel model) { + this.net = model; + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/LayerVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/LayerVertex.java index f93c1619b..67f6ee365 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/LayerVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/LayerVertex.java @@ -27,6 +27,7 @@ import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.inputs.InvalidInputTypeException; +import org.deeplearning4j.nn.conf.layers.BaseLayerConfiguration; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.memory.MemoryReport; import org.deeplearning4j.nn.graph.ComputationGraph; @@ -153,7 +154,8 @@ public class LayerVertex extends GraphVertex { @Override public void setDataType(DataType dataType){ - layerConfiguration.setDataType(dataType); + if(layerConfiguration instanceof BaseLayerConfiguration) + ((BaseLayerConfiguration)layerConfiguration).setDataType(dataType); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ActivationLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ActivationLayer.java index 378ae01a2..d7ee4b8ef 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ActivationLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ActivationLayer.java @@ -35,6 +35,7 @@ import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.learning.config.IUpdater; import java.util.Collection; import java.util.Map; @@ -74,6 +75,11 @@ public class ActivationLayer extends NoParamLayer { return clone; } + @Override + public IUpdater getIUpdater() { + return null; + } + @Override public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { @@ -81,7 +87,7 @@ public class ActivationLayer extends NoParamLayer { LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); org.deeplearning4j.nn.layers.ActivationLayer ret = new org.deeplearning4j.nn.layers.ActivationLayer(lconf, networkDataType); - ret.setListeners(trainingListeners); + ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); Map paramTable = initializer().init(this, layerParamsView, initializeParams); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AutoEncoder.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AutoEncoder.java index 311359f7f..72615eca8 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AutoEncoder.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AutoEncoder.java @@ -60,7 +60,7 @@ public class AutoEncoder extends BasePretrainNetwork { org.deeplearning4j.nn.layers.feedforward.autoencoder.AutoEncoder ret = new org.deeplearning4j.nn.layers.feedforward.autoencoder.AutoEncoder(lconf, networkDataType); - ret.setListeners(trainingListeners); + ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); Map paramTable = initializer().init(this, layerParamsView, initializeParams); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseLayerConfiguration.java similarity index 96% rename from cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseLayer.java rename to cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseLayerConfiguration.java index bf30e0f7a..121f9b38f 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseLayerConfiguration.java @@ -21,6 +21,7 @@ package org.deeplearning4j.nn.conf.layers; import lombok.*; +import org.deeplearning4j.nn.api.ITraininableLayerConfiguration; import org.deeplearning4j.nn.conf.GradientNormalization; import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.distribution.Distribution; @@ -31,6 +32,7 @@ import org.deeplearning4j.nn.weights.WeightInitDistribution; import org.deeplearning4j.util.NetworkUtils; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.activations.IActivation; +import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.learning.config.IUpdater; import org.nd4j.linalg.learning.regularization.L1Regularization; import org.nd4j.linalg.learning.regularization.L2Regularization; @@ -46,27 +48,26 @@ import java.util.List; */ @Data @EqualsAndHashCode(callSuper = true) -@NoArgsConstructor -public abstract class BaseLayer extends LayerConfiguration implements Serializable, Cloneable { +@NoArgsConstructor(force = true) +public abstract class BaseLayerConfiguration extends LayerConfiguration implements ITraininableLayerConfiguration, Serializable, Cloneable { - protected IActivation activationFn; @NonNull protected IWeightInit weightInitFn; - protected double biasInit; - protected double gainInit; + protected double biasInit = 0.0; + protected double gainInit = 0.0; protected List regularization; protected List regularizationBias; protected IUpdater iUpdater; protected IUpdater biasUpdater; - protected IWeightNoise weightNoise; + private DataType dataType; + protected GradientNormalization gradientNormalization = GradientNormalization.None; //Clipping, rescale based on l2 norm, etc protected double gradientNormalizationThreshold = 1.0; //Threshold for l2 and element-wise gradient clipping - public BaseLayer(Builder builder) { + public BaseLayerConfiguration(Builder builder) { super(builder); this.layerName = builder.layerName; - this.activationFn = builder.activationFn; this.weightInitFn = builder.weightInitFn; this.biasInit = builder.biasInit; this.gainInit = builder.gainInit; @@ -77,6 +78,7 @@ public abstract class BaseLayer extends LayerConfiguration implements Serializab this.gradientNormalization = builder.gradientNormalization; this.gradientNormalizationThreshold = builder.gradientNormalizationThreshold; this.weightNoise = builder.weightNoise; + super.setActivationFn(builder.activationFn); } /** @@ -99,8 +101,8 @@ public abstract class BaseLayer extends LayerConfiguration implements Serializab } @Override - public BaseLayer clone() { - BaseLayer clone = (BaseLayer) super.clone(); + public BaseLayerConfiguration clone() { + BaseLayerConfiguration clone = (BaseLayerConfiguration) super.clone(); if (clone.iDropout != null) { clone.iDropout = clone.iDropout.clone(); } @@ -121,7 +123,7 @@ public abstract class BaseLayer extends LayerConfiguration implements Serializab } /** - * Get the updater for the given parameter. Typically the same updater will be used for all updaters, but this is + * Get the updater for the given parameter. Typically the same updater will be used for all parameters, but this is * not necessarily the case * * @param paramName Parameter name @@ -174,13 +176,13 @@ public abstract class BaseLayer extends LayerConfiguration implements Serializab * Bias initialization value, for layers with biases. Defaults to 0 * */ - protected double biasInit = Double.NaN; + protected double biasInit = 0.0; /** * Gain initialization value, for layers with ILayer Normalization. Defaults to 1 * */ - protected double gainInit = Double.NaN; + protected double gainInit = 1.0; /** * Regularization for the parameters (excluding biases). @@ -504,7 +506,6 @@ public abstract class BaseLayer extends LayerConfiguration implements Serializab this.setWeightNoise(weightNoise); return (T) this; } - - } -} + +} \ No newline at end of file diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BatchNormalization.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BatchNormalization.java index 68e3a0851..ab0044448 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BatchNormalization.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BatchNormalization.java @@ -97,7 +97,7 @@ public class BatchNormalization extends FeedForwardLayer { LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); org.deeplearning4j.nn.layers.normalization.BatchNormalization ret = new org.deeplearning4j.nn.layers.normalization.BatchNormalization(lconf, networkDataType); - ret.setListeners(trainingListeners); + ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); Map paramTable = initializer().init(this, layerParamsView, initializeParams); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CenterLossOutputLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CenterLossOutputLayer.java index a25a10947..afe3fcc48 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CenterLossOutputLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CenterLossOutputLayer.java @@ -66,7 +66,7 @@ public class CenterLossOutputLayer extends BaseOutputLayer { LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); Layer ret = new org.deeplearning4j.nn.layers.training.CenterLossOutputLayer(lconf, networkDataType); - ret.setListeners(trainingListeners.toArray(new TrainingListener[]{})); + ret.addTrainingListeners(trainingListeners.toArray(new TrainingListener[]{})); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); Map paramTable = initializer().init(this, layerParamsView, initializeParams); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Cnn3DLossLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Cnn3DLossLayer.java index 5c3cede7e..79782d956 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Cnn3DLossLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Cnn3DLossLayer.java @@ -60,7 +60,7 @@ public class Cnn3DLossLayer extends FeedForwardLayer { LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); org.deeplearning4j.nn.layers.convolution.Cnn3DLossLayer ret = new org.deeplearning4j.nn.layers.convolution.Cnn3DLossLayer(lconf, networkDataType); - ret.setListeners(trainingListeners); + ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); Map paramTable = initializer().init(this, layerParamsView, initializeParams); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CnnLossLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CnnLossLayer.java index bcad7fb65..b4f93482d 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CnnLossLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CnnLossLayer.java @@ -65,7 +65,7 @@ public class CnnLossLayer extends FeedForwardLayer { LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); org.deeplearning4j.nn.layers.convolution.CnnLossLayer ret = new org.deeplearning4j.nn.layers.convolution.CnnLossLayer(lconf, networkDataType); - ret.setListeners(trainingListeners); + ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); Map paramTable = initializer().init(this, layerParamsView, initializeParams); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1DLayer.java index eeb023374..cf4fb5a1a 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1DLayer.java @@ -69,7 +69,7 @@ public class Convolution1DLayer extends ConvolutionLayer { LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); org.deeplearning4j.nn.layers.convolution.Convolution1DLayer ret = new org.deeplearning4j.nn.layers.convolution.Convolution1DLayer(lconf, networkDataType); - ret.setListeners(trainingListeners); + ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); Map paramTable = initializer().init(this, layerParamsView, initializeParams); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution3D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution3D.java index 28a03ed4e..99992463a 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution3D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution3D.java @@ -100,7 +100,7 @@ public class Convolution3D extends ConvolutionLayer { LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); Convolution3DLayer ret = new Convolution3DLayer(lconf, networkDataType); - ret.setListeners(iterationListeners); + ret.addTrainingListeners(iterationListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); Map paramTable = initializer().init(this, layerParamsView, initializeParams); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java index a09d33506..25ad6ba4b 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java @@ -177,7 +177,7 @@ public class ConvolutionLayer extends FeedForwardLayer { org.deeplearning4j.nn.layers.convolution.ConvolutionLayer ret = new org.deeplearning4j.nn.layers.convolution.ConvolutionLayer(lconf, networkDataType); - ret.setListeners(trainingListeners); + ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); Map paramTable = initializer().init(this, layerParamsView, initializeParams); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution2D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution2D.java index d5b113b7f..d805561d0 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution2D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution2D.java @@ -88,7 +88,7 @@ public class Deconvolution2D extends ConvolutionLayer { org.deeplearning4j.nn.layers.convolution.Deconvolution2DLayer ret = new org.deeplearning4j.nn.layers.convolution.Deconvolution2DLayer(lconf, networkDataType); - ret.setListeners(trainingListeners); + ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); Map paramTable = initializer().init(this, layerParamsView, initializeParams); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution3D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution3D.java index 99ed3137b..ea19c1148 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution3D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution3D.java @@ -85,7 +85,7 @@ public class Deconvolution3D extends ConvolutionLayer { LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); Deconvolution3DLayer ret = new Deconvolution3DLayer(lconf, networkDataType); - ret.setListeners(trainingListeners); + ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); Map paramTable = initializer().init(this, layerParamsView, initializeParams); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DenseLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DenseLayer.java index fce42e8e5..bfd88a62d 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DenseLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DenseLayer.java @@ -62,12 +62,13 @@ public class DenseLayer extends FeedForwardLayer { int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { LayerValidation.assertNInNOutSet("DenseLayerConfiguration", getLayerName(), layerIndex, getNIn(), getNOut()); + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); org.deeplearning4j.nn.layers.feedforward.dense.DenseLayer ret = new org.deeplearning4j.nn.layers.feedforward.dense.DenseLayer(lconf, networkDataType); if(getWeightInitFn() == null) setWeightInitFn(new WeightInitXavier()); - ret.setListeners(trainingListeners); + ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); Map paramTable = initializer().init(this, layerParamsView, initializeParams); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DepthwiseConvolution2D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DepthwiseConvolution2D.java index 52eb89ecf..307604ce0 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DepthwiseConvolution2D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DepthwiseConvolution2D.java @@ -71,7 +71,7 @@ public class DepthwiseConvolution2D extends ConvolutionLayer { LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); DepthwiseConvolution2DLayer ret = new DepthwiseConvolution2DLayer(lconf, networkDataType); - ret.setListeners(trainingListeners); + ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); Map paramTable = initializer().init(this, layerParamsView, initializeParams); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DropoutLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DropoutLayer.java index 573b6c617..521dacd23 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DropoutLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DropoutLayer.java @@ -74,7 +74,7 @@ public class DropoutLayer extends FeedForwardLayer { LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); org.deeplearning4j.nn.layers.DropoutLayer ret = new org.deeplearning4j.nn.layers.DropoutLayer(lconf, networkDataType); - ret.setListeners(trainingListeners); + ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); Map paramTable = initializer().init(this, layerParamsView, initializeParams); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingLayer.java index 3ef26352b..36d719ddc 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingLayer.java @@ -61,7 +61,7 @@ public class EmbeddingLayer extends FeedForwardLayer { org.deeplearning4j.nn.layers.feedforward.embedding.EmbeddingLayer ret = new org.deeplearning4j.nn.layers.feedforward.embedding.EmbeddingLayer(lconf, networkDataType); - ret.setListeners(trainingListeners); + ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); Map paramTable = initializer().init(this, layerParamsView, initializeParams); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingSequenceLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingSequenceLayer.java index 133b0b6c1..2ec7b654c 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingSequenceLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingSequenceLayer.java @@ -69,7 +69,7 @@ public class EmbeddingSequenceLayer extends FeedForwardLayer { org.deeplearning4j.nn.layers.feedforward.embedding.EmbeddingSequenceLayer ret = new org.deeplearning4j.nn.layers.feedforward.embedding.EmbeddingSequenceLayer(lconf, networkDataType); - ret.setListeners(trainingListeners); + ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); Map paramTable = initializer().init(this, layerParamsView, initializeParams); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/FeedForwardLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/FeedForwardLayer.java index 3728e55bb..de733add8 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/FeedForwardLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/FeedForwardLayer.java @@ -28,13 +28,12 @@ import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.preprocessor.Cnn3DToFeedForwardPreProcessor; import org.deeplearning4j.nn.conf.preprocessor.CnnToFeedForwardPreProcessor; import org.deeplearning4j.nn.conf.preprocessor.RnnToFeedForwardPreProcessor; -import org.deeplearning4j.nn.params.DefaultParamInitializer; @Data @NoArgsConstructor @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -public abstract class FeedForwardLayer extends BaseLayer { +public abstract class FeedForwardLayer extends BaseLayerConfiguration { protected long nIn; protected long nOut; @@ -123,7 +122,7 @@ public abstract class FeedForwardLayer extends BaseLayer { @Getter @Setter - public abstract static class Builder> extends BaseLayer.Builder { + public abstract static class Builder> extends BaseLayerConfiguration.Builder { /** * Number of inputs for the layer (usually the size of the last layer).
Note that for Convolutional layers, diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GlobalPoolingLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GlobalPoolingLayer.java index 1cd9e6c91..6d95ae93b 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GlobalPoolingLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GlobalPoolingLayer.java @@ -73,7 +73,7 @@ public class GlobalPoolingLayer extends NoParamLayer { org.deeplearning4j.nn.layers.pooling.GlobalPoolingLayer ret = new org.deeplearning4j.nn.layers.pooling.GlobalPoolingLayer(lconf, networkDataType); - ret.setListeners(trainingListeners); + ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); Map paramTable = initializer().init(this, layerParamsView, initializeParams); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesBidirectionalLSTM.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesBidirectionalLSTM.java index ac6242e9a..792d735c3 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesBidirectionalLSTM.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesBidirectionalLSTM.java @@ -83,7 +83,7 @@ public class GravesBidirectionalLSTM extends BaseRecurrentLayer { LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); org.deeplearning4j.nn.layers.recurrent.GravesBidirectionalLSTM ret = new org.deeplearning4j.nn.layers.recurrent.GravesBidirectionalLSTM(lconf, networkDataType); - ret.setListeners(trainingListeners); + ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); Map paramTable = initializer().init(this, layerParamsView, initializeParams); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesLSTM.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesLSTM.java index bb84cedae..1cdd16dba 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesLSTM.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesLSTM.java @@ -81,7 +81,7 @@ public class GravesLSTM extends AbstractLSTM { org.deeplearning4j.nn.layers.recurrent.GravesLSTM ret = new org.deeplearning4j.nn.layers.recurrent.GravesLSTM(lconf, networkDataType); - ret.setListeners(trainingListeners); + ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); Map paramTable = initializer().init(this, layerParamsView, initializeParams); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LSTM.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LSTM.java index 8474d3089..85c440c18 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LSTM.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LSTM.java @@ -77,7 +77,7 @@ public class LSTM extends AbstractLSTM { LayerValidation.assertNInNOutSet("LSTM", getLayerName(), layerIndex, getNIn(), getNOut()); LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); org.deeplearning4j.nn.layers.recurrent.LSTM ret = new org.deeplearning4j.nn.layers.recurrent.LSTM(lconf, networkDataType); - ret.setListeners(trainingListeners); + ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); Map paramTable = initializer().init(this, layerParamsView, initializeParams); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerConfiguration.java index bb98be57d..b0131b80d 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerConfiguration.java @@ -20,7 +20,9 @@ package org.deeplearning4j.nn.conf.layers; +import com.fasterxml.jackson.annotation.JsonIdentityInfo; import com.fasterxml.jackson.annotation.JsonTypeInfo; +import com.fasterxml.jackson.annotation.ObjectIdGenerators; import java.io.Serializable; import java.lang.reflect.Field; import java.util.ArrayList; @@ -33,9 +35,10 @@ import lombok.Getter; import lombok.NoArgsConstructor; import lombok.NonNull; import lombok.Setter; +import lombok.extern.slf4j.Slf4j; +import net.brutex.ai.dnn.api.ILayerConfiguration; import net.brutex.ai.dnn.api.LayerType; import org.deeplearning4j.nn.api.ParamInitializer; -import org.deeplearning4j.nn.api.TrainingConfig; import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -43,6 +46,7 @@ import org.deeplearning4j.nn.conf.dropout.Dropout; import org.deeplearning4j.nn.conf.dropout.IDropout; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; +import org.deeplearning4j.nn.conf.weightnoise.IWeightNoise; import org.deeplearning4j.optimize.api.TrainingListener; import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.api.buffer.DataType; @@ -58,17 +62,18 @@ import org.nd4j.linalg.learning.regularization.Regularization; @JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, include = JsonTypeInfo.As.PROPERTY, property = "@class") @Data @NoArgsConstructor +@JsonIdentityInfo(generator= ObjectIdGenerators.IntSequenceGenerator.class, property="@id") +@Slf4j +public abstract class LayerConfiguration implements ILayerConfiguration, Serializable, Cloneable { // ITraininableLayerConfiguration -public abstract class LayerConfiguration implements TrainingConfig, Serializable, Cloneable { - - protected String layerName; + protected String layerName = "noname"; @Getter protected List variables = new ArrayList<>(); public void addVariable(String s) {variables.add(s);} protected IDropout iDropout; protected List constraints; - + protected IWeightNoise weightNoise; /** * The type of the layer, basically defines the base class and its properties */ @@ -247,7 +252,7 @@ public abstract class LayerConfiguration implements TrainingConfig, Serializable public abstract boolean isPretrainParam(String paramName); /** - * Get the updater for the given parameter. Typically the same updater will be used for all + * Get the updater for the given parameter. Typically, the same updater will be used for all * parameters, but this is not necessarily the case * * @param paramName Parameter name @@ -258,12 +263,13 @@ public abstract class LayerConfiguration implements TrainingConfig, Serializable "Not supported: all layers with parameters should override this method"); } - @Getter - private IUpdater iUpdater; - @Override - public void setDataType(DataType dataType) { - //No-op for most layers + public IUpdater getIUpdater() { + throw new UnsupportedOperationException( + "Not supported: all layers with parameters should override this method"); + } + public void setIUpdater(IUpdater iUpdater) { + log.warn("Setting an IUpdater on {} with name {} has no effect.", getClass().getSimpleName(), getLayerName()); } /** @@ -279,15 +285,15 @@ public abstract class LayerConfiguration implements TrainingConfig, Serializable this.variables.clear(); } - @Getter - public IActivation activationFn; + @Getter @Setter + private IActivation activationFn; @SuppressWarnings("unchecked") @Getter @Setter public abstract static class Builder> { - protected String layerName = null; + protected String layerName = "noname"; protected List allParamConstraints; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerValidation.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerValidation.java index a125d4ffc..fcde1b127 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerValidation.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerValidation.java @@ -79,11 +79,11 @@ public class LayerValidation { List weightConstraints, List biasConstraints) { if (layer != null) { - if (layer instanceof BaseLayer) { - BaseLayer bLayer = (BaseLayer) layer; + if (layer instanceof BaseLayerConfiguration) { + BaseLayerConfiguration bLayer = (BaseLayerConfiguration) layer; configureBaseLayer(layerName, bLayer, iDropout, regularization, regularizationBias); - } else if (layer instanceof FrozenLayer && ((FrozenLayer) layer).getInnerConfiguration() instanceof BaseLayer) { - BaseLayer bLayer = (BaseLayer) ((FrozenLayer) layer).getInnerConfiguration(); + } else if (layer instanceof FrozenLayer && ((FrozenLayer) layer).getInnerConfiguration() instanceof BaseLayerConfiguration) { + BaseLayerConfiguration bLayer = (BaseLayerConfiguration) ((FrozenLayer) layer).getInnerConfiguration(); configureBaseLayer(layerName, bLayer, iDropout, regularization, regularizationBias); } else if (layer instanceof Bidirectional) { Bidirectional l = (Bidirectional) layer; @@ -128,7 +128,7 @@ public class LayerValidation { } } - private static void configureBaseLayer(String layerName, BaseLayer bLayer, IDropout iDropout, + private static void configureBaseLayer(String layerName, BaseLayerConfiguration bLayer, IDropout iDropout, List regularization, List regularizationBias) { if (regularization != null && !regularization.isEmpty()) { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocalResponseNormalization.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocalResponseNormalization.java index 77483640c..75397400b 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocalResponseNormalization.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocalResponseNormalization.java @@ -23,7 +23,6 @@ package org.deeplearning4j.nn.conf.layers; import lombok.*; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.CNN2DFormat; -import org.deeplearning4j.nn.conf.GradientNormalization; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -79,7 +78,7 @@ public class LocalResponseNormalization extends LayerConfiguration { org.deeplearning4j.nn.layers.normalization.LocalResponseNormalization ret = new org.deeplearning4j.nn.layers.normalization.LocalResponseNormalization(lconf, networkDataType); - ret.setListeners(trainingListeners); + ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); Map paramTable = initializer().init(this, layerParamsView, initializeParams); @@ -130,16 +129,6 @@ public class LocalResponseNormalization extends LayerConfiguration { return false; //No params in LRN } - @Override - public GradientNormalization getGradientNormalization() { - return GradientNormalization.None; - } - - @Override - public double getGradientNormalizationThreshold() { - return 0; - } - @Override public LayerMemoryReport getMemoryReport(InputType inputType) { val actElementsPerEx = inputType.arrayElementsPerExample(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LossLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LossLayer.java index 2e89f7ee7..226d3255d 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LossLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LossLayer.java @@ -60,7 +60,7 @@ public class LossLayer extends FeedForwardLayer { LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); org.deeplearning4j.nn.layers.LossLayer ret = new org.deeplearning4j.nn.layers.LossLayer(lconf, networkDataType); - ret.setListeners(trainingListeners); + ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); Map paramTable = initializer().init(this, layerParamsView, initializeParams); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/NoParamLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/NoParamLayer.java index 7d0c181f8..57a58f42c 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/NoParamLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/NoParamLayer.java @@ -24,8 +24,10 @@ import lombok.NoArgsConstructor; import net.brutex.ai.dnn.api.LayerType; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.GradientNormalization; +import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.params.EmptyParamInitializer; +import org.nd4j.linalg.learning.config.IUpdater; import org.nd4j.linalg.learning.regularization.Regularization; import java.util.List; @@ -55,18 +57,17 @@ public abstract class NoParamLayer extends LayerConfiguration { return null; } - @Override - public GradientNormalization getGradientNormalization() { - return GradientNormalization.None; - } - - @Override - public double getGradientNormalizationThreshold() { - return 0; - } - @Override public boolean isPretrainParam(String paramName) { throw new UnsupportedOperationException(getClass().getSimpleName() + " does not contain parameters"); } + +/** +* + * @return +*/ + @Override + public IUpdater getIUpdater() { + return Updater.NONE.getIUpdaterWithDefaultConfig(); + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/OutputLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/OutputLayer.java index 2616ed8d9..f024caec2 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/OutputLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/OutputLayer.java @@ -56,7 +56,7 @@ public class OutputLayer extends BaseOutputLayer { LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); org.deeplearning4j.nn.layers.OutputLayer ret = new org.deeplearning4j.nn.layers.OutputLayer(lconf, networkDataType); - ret.setListeners(trainingListeners); + ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); Map paramTable = initializer().init(this, layerParamsView, initializeParams); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PReLULayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PReLULayer.java index e44f7f709..50647d0f1 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PReLULayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PReLULayer.java @@ -41,7 +41,7 @@ import java.util.Map; @NoArgsConstructor @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -public class PReLULayer extends BaseLayer { +public class PReLULayer extends BaseLayerConfiguration { private long[] inputShape = null; private long[] sharedAxes = null; @@ -61,7 +61,7 @@ public class PReLULayer extends BaseLayer { int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); org.deeplearning4j.nn.layers.feedforward.PReLU ret = new org.deeplearning4j.nn.layers.feedforward.PReLU(lconf, networkDataType); - ret.setListeners(trainingListeners); + ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); Map paramTable = initializer().init(this, layerParamsView, initializeParams); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnLossLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnLossLayer.java index 1127d0be0..4742b9e5b 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnLossLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnLossLayer.java @@ -64,7 +64,7 @@ public class RnnLossLayer extends FeedForwardLayer { org.deeplearning4j.nn.layers.recurrent.RnnLossLayer ret = new org.deeplearning4j.nn.layers.recurrent.RnnLossLayer(lconf, networkDataType); - ret.setListeners(trainingListeners); + ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); Map paramTable = initializer().init(this, layerParamsView, initializeParams); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnOutputLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnOutputLayer.java index 629e70da6..5b59c5399 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnOutputLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnOutputLayer.java @@ -63,7 +63,7 @@ public class RnnOutputLayer extends BaseOutputLayer { org.deeplearning4j.nn.layers.recurrent.RnnOutputLayer ret = new org.deeplearning4j.nn.layers.recurrent.RnnOutputLayer(lconf, networkDataType); - ret.setListeners(trainingListeners); + ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); Map paramTable = initializer().init(this, layerParamsView, initializeParams); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SeparableConvolution2D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SeparableConvolution2D.java index 34bc03086..924c4cc2a 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SeparableConvolution2D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SeparableConvolution2D.java @@ -121,7 +121,7 @@ public class SeparableConvolution2D extends ConvolutionLayer { org.deeplearning4j.nn.layers.convolution.SeparableConvolution2DLayer ret = new org.deeplearning4j.nn.layers.convolution.SeparableConvolution2DLayer(lconf, networkDataType); - ret.setListeners(trainingListeners); + ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); Map paramTable = initializer().init(this, layerParamsView, initializeParams); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToBatchLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToBatchLayer.java index ff4082075..50f91781b 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToBatchLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToBatchLayer.java @@ -71,7 +71,7 @@ public class SpaceToBatchLayer extends NoParamLayer { org.deeplearning4j.nn.layers.convolution.SpaceToBatch ret = new org.deeplearning4j.nn.layers.convolution.SpaceToBatch(lconf, networkDataType); - ret.setListeners(trainingListeners); + ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); Map paramTable = initializer().init(this, layerParamsView, initializeParams); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToDepthLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToDepthLayer.java index 110d127b0..462f3ab5e 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToDepthLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToDepthLayer.java @@ -77,7 +77,7 @@ public class SpaceToDepthLayer extends NoParamLayer { org.deeplearning4j.nn.layers.convolution.SpaceToDepth ret = new org.deeplearning4j.nn.layers.convolution.SpaceToDepth(lconf, networkDataType); - ret.setListeners(trainingListeners); + ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); Map paramTable = initializer().init(this, layerParamsView, initializeParams); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling1DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling1DLayer.java index 5d48dfa6b..be544fb2f 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling1DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling1DLayer.java @@ -65,7 +65,7 @@ public class Subsampling1DLayer extends SubsamplingLayer { org.deeplearning4j.nn.layers.convolution.subsampling.Subsampling1DLayer ret = new org.deeplearning4j.nn.layers.convolution.subsampling.Subsampling1DLayer(lconf, networkDataType); - ret.setListeners(trainingListeners); + ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); Map paramTable = initializer().init(this, layerParamsView, initializeParams); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling3DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling3DLayer.java index d201c88b2..123df419b 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling3DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling3DLayer.java @@ -117,7 +117,7 @@ public class Subsampling3DLayer extends NoParamLayer { org.deeplearning4j.nn.layers.convolution.subsampling.Subsampling3DLayer ret = new org.deeplearning4j.nn.layers.convolution.subsampling.Subsampling3DLayer(lconf, networkDataType); - ret.setListeners(iterationListeners); + ret.addTrainingListeners(iterationListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); Map paramTable = initializer().init(this, layerParamsView, initializeParams); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java index 32983b01c..bddd9fc30 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java @@ -138,7 +138,7 @@ public class SubsamplingLayer extends NoParamLayer { org.deeplearning4j.nn.layers.convolution.subsampling.SubsamplingLayer ret = new org.deeplearning4j.nn.layers.convolution.subsampling.SubsamplingLayer(lconf, networkDataType); - ret.setListeners(trainingListeners); + ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); Map paramTable = initializer().init(this, layerParamsView, initializeParams); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling1D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling1D.java index 6f7a7c091..a2d3c4fb8 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling1D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling1D.java @@ -61,7 +61,7 @@ public class Upsampling1D extends BaseUpsamplingLayer { org.deeplearning4j.nn.layers.convolution.upsampling.Upsampling1D ret = new org.deeplearning4j.nn.layers.convolution.upsampling.Upsampling1D(lconf, networkDataType); - ret.setListeners(trainingListeners); + ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); Map paramTable = initializer().init(this, layerParamsView, initializeParams); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling2D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling2D.java index 61693091a..48e86c848 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling2D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling2D.java @@ -67,7 +67,7 @@ public class Upsampling2D extends BaseUpsamplingLayer { org.deeplearning4j.nn.layers.convolution.upsampling.Upsampling2D ret = new org.deeplearning4j.nn.layers.convolution.upsampling.Upsampling2D(lconf, networkDataType); - ret.setListeners(trainingListeners); + ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); Map paramTable = initializer().init(this, layerParamsView, initializeParams); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling3D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling3D.java index f4d5fa280..4d629e2fd 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling3D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling3D.java @@ -67,7 +67,7 @@ public class Upsampling3D extends BaseUpsamplingLayer { new org.deeplearning4j.nn.layers.convolution.upsampling.Upsampling3D(lconf, networkDataType); - ret.setListeners(iterationListeners); + ret.addTrainingListeners(iterationListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); Map paramTable = initializer().init(this, layerParamsView, initializeParams); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPadding1DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPadding1DLayer.java index aa0268be1..43f6e4ed1 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPadding1DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPadding1DLayer.java @@ -70,7 +70,7 @@ public class ZeroPadding1DLayer extends NoParamLayer { org.deeplearning4j.nn.layers.convolution.ZeroPadding1DLayer ret = new org.deeplearning4j.nn.layers.convolution.ZeroPadding1DLayer(lconf, networkDataType); - ret.setListeners(trainingListeners); + ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPadding3DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPadding3DLayer.java index 21d77ae03..cdabe2788 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPadding3DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPadding3DLayer.java @@ -57,7 +57,7 @@ public class ZeroPadding3DLayer extends NoParamLayer { org.deeplearning4j.nn.layers.convolution.ZeroPadding3DLayer ret = new org.deeplearning4j.nn.layers.convolution.ZeroPadding3DLayer(lconf, networkDataType); - ret.setListeners(iterationListeners); + ret.addTrainingListeners(iterationListeners); ret.setIndex(layerIndex); Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPaddingLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPaddingLayer.java index 0d0e85d56..4582f42c5 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPaddingLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPaddingLayer.java @@ -74,7 +74,7 @@ public class ZeroPaddingLayer extends NoParamLayer { org.deeplearning4j.nn.layers.convolution.ZeroPaddingLayer ret = new org.deeplearning4j.nn.layers.convolution.ZeroPaddingLayer(lconf, networkDataType); - ret.setListeners(trainingListeners); + ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping1D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping1D.java index 2124e9eb9..ef3cedabe 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping1D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping1D.java @@ -79,7 +79,7 @@ public class Cropping1D extends NoParamLayer { setNetConfiguration(conf); LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); Cropping1DLayer ret = new Cropping1DLayer(lconf, networkDataType); - ret.setListeners(trainingListeners); + ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping2D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping2D.java index 604a269cb..d73d33950 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping2D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping2D.java @@ -95,7 +95,7 @@ public class Cropping2D extends NoParamLayer { setNetConfiguration(conf); LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); Cropping2DLayer ret = new Cropping2DLayer(lconf, networkDataType); - ret.setListeners(trainingListeners); + ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping3D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping3D.java index c22c8f429..a950ed633 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping3D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping3D.java @@ -87,7 +87,7 @@ public class Cropping3D extends NoParamLayer { setNetConfiguration(conf); LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); Cropping3DLayer ret = new Cropping3DLayer(lconf, networkDataType); - ret.setListeners(iterationListeners); + ret.addTrainingListeners(iterationListeners); ret.setIndex(layerIndex); Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/ElementWiseMultiplicationLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/ElementWiseMultiplicationLayer.java index dc7e9b93d..703d95cea 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/ElementWiseMultiplicationLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/ElementWiseMultiplicationLayer.java @@ -68,7 +68,7 @@ public class ElementWiseMultiplicationLayer extends org.deeplearning4j.nn.conf.l org.deeplearning4j.nn.layers.feedforward.elementwise.ElementWiseMultiplicationLayer ret = new org.deeplearning4j.nn.layers.feedforward.elementwise.ElementWiseMultiplicationLayer(lconf, networkDataType); - ret.setListeners(trainingListeners); + ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); Map paramTable = initializer().init(this, layerParamsView, initializeParams); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayer.java index 35a4cae8d..eb15350dc 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayer.java @@ -125,16 +125,6 @@ public class FrozenLayer extends LayerConfiguration { return null; } - @Override - public GradientNormalization getGradientNormalization() { - return innerConfiguration.getGradientNormalization(); - } - - @Override - public double getGradientNormalizationThreshold() { - return innerConfiguration.getGradientNormalizationThreshold(); - } - @Override public LayerMemoryReport getMemoryReport(InputType inputType) { return innerConfiguration.getMemoryReport(inputType); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayerWithBackprop.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayerWithBackprop.java index ae438958f..6abf467d3 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayerWithBackprop.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayerWithBackprop.java @@ -26,7 +26,7 @@ import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; -import org.deeplearning4j.nn.conf.layers.wrapper.BaseWrapperLayer; +import org.deeplearning4j.nn.conf.layers.wrapper.BaseWrapperLayerConfiguration; import org.deeplearning4j.nn.params.FrozenLayerWithBackpropParamInitializer; import org.deeplearning4j.optimize.api.TrainingListener; import org.nd4j.linalg.api.buffer.DataType; @@ -40,11 +40,15 @@ import java.util.List; @Data @EqualsAndHashCode(callSuper = false) -public class FrozenLayerWithBackprop extends BaseWrapperLayer { +public class FrozenLayerWithBackprop extends BaseWrapperLayerConfiguration { + /** + * Create a new Frozen Layer, that wraps another layer with backpropagation enabled. + * + * @param layer configuration of the layer to wrap + */ public FrozenLayerWithBackprop(@JsonProperty("layer") LayerConfiguration layer) { super(layer); - underlying = layer; } public NeuralNetConfiguration getInnerConf(NeuralNetConfiguration conf) { @@ -66,9 +70,10 @@ public class FrozenLayerWithBackprop extends BaseWrapperLayer { boolean initializeParams, DataType networkDataType) { //Need to be able to instantiate a layer, from a config - for JSON -> net type situations - org.deeplearning4j.nn.api.Layer underlying = getUnderlying().instantiate(conf, trainingListeners, + org.deeplearning4j.nn.api.Layer newUnderlyingLayer = underlying.instantiate(conf, trainingListeners, layerIndex, layerParamsView, initializeParams, networkDataType); + newUnderlyingLayer.setLayerConfiguration(underlying); //Fix a problem, where the embedded layer gets the conf of the frozen layer, rather than its own NeuralNetConfiguration nncUnderlying = underlying.getNetConfiguration(); if (nncUnderlying.netWideVariables() != null) { @@ -81,7 +86,7 @@ public class FrozenLayerWithBackprop extends BaseWrapperLayer { } } - return new org.deeplearning4j.nn.layers.FrozenLayerWithBackprop(underlying); + return new org.deeplearning4j.nn.layers.FrozenLayerWithBackprop(newUnderlyingLayer); } @Override diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/RepeatVector.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/RepeatVector.java index ba85f879c..541c26914 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/RepeatVector.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/RepeatVector.java @@ -69,7 +69,7 @@ public class RepeatVector extends FeedForwardLayer { LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); org.deeplearning4j.nn.layers.RepeatVector ret = new org.deeplearning4j.nn.layers.RepeatVector(lconf, networkDataType); - ret.setListeners(trainingListeners); + ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); Map paramTable = initializer().init(this, layerParamsView, initializeParams); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/Yolo2OutputLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/Yolo2OutputLayer.java index d2d4bec81..70bd048e6 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/Yolo2OutputLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/Yolo2OutputLayer.java @@ -27,7 +27,6 @@ import lombok.Setter; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.CNN2DFormat; -import org.deeplearning4j.nn.conf.GradientNormalization; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -84,7 +83,7 @@ public class Yolo2OutputLayer extends LayerConfiguration { org.deeplearning4j.nn.layers.objdetect.Yolo2OutputLayer ret = new org.deeplearning4j.nn.layers.objdetect.Yolo2OutputLayer(lconf, networkDataType); - ret.setListeners(trainingListeners); + ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); Map paramTable = initializer().init(this, layerParamsView, initializeParams); @@ -135,17 +134,6 @@ public class Yolo2OutputLayer extends LayerConfiguration { public boolean isPretrainParam(String paramName) { return false; //No params } - - @Override - public GradientNormalization getGradientNormalization() { - return GradientNormalization.None; - } - - @Override - public double getGradientNormalizationThreshold() { - return 1.0; - } - @Override public LayerMemoryReport getMemoryReport(InputType inputType) { long numValues = inputType.arrayElementsPerExample(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/Bidirectional.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/Bidirectional.java index 5eda741e4..573492f3a 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/Bidirectional.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/Bidirectional.java @@ -30,7 +30,7 @@ import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.BaseRecurrentLayer; import org.deeplearning4j.nn.conf.layers.FeedForwardLayer; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; -import org.deeplearning4j.nn.conf.layers.wrapper.BaseWrapperLayer; +import org.deeplearning4j.nn.conf.layers.wrapper.BaseWrapperLayerConfiguration; import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.layers.recurrent.BidirectionalLayer; import org.deeplearning4j.nn.params.BidirectionalParamInitializer; @@ -93,7 +93,7 @@ public class Bidirectional extends LayerConfiguration { */ public Bidirectional(@NonNull Mode mode, @NonNull LayerConfiguration layer) { if (!(layer instanceof BaseRecurrentLayer || layer instanceof LastTimeStep - || layer instanceof BaseWrapperLayer)) { + || layer instanceof BaseWrapperLayerConfiguration)) { throw new IllegalArgumentException("Cannot wrap a non-recurrent layer: " + "config must extend BaseRecurrentLayer or LastTimeStep " + "Got class: " + layer.getClass()); @@ -211,16 +211,6 @@ public class Bidirectional extends LayerConfiguration { return fwd.getUpdaterByParam(sub); } - @Override - public GradientNormalization getGradientNormalization() { - return fwd.getGradientNormalization(); - } - - @Override - public double getGradientNormalizationThreshold() { - return fwd.getGradientNormalizationThreshold(); - } - @Override public void setLayerName(String layerName) { this.layerName = layerName; @@ -254,7 +244,7 @@ public class Bidirectional extends LayerConfiguration { public Builder rnnLayer(LayerConfiguration layer) { if (!(layer instanceof BaseRecurrentLayer || layer instanceof LastTimeStep - || layer instanceof BaseWrapperLayer)) { + || layer instanceof BaseWrapperLayerConfiguration)) { throw new IllegalArgumentException("Cannot wrap a non-recurrent layer: " + "config must extend BaseRecurrentLayer or LastTimeStep " + "Got class: " + layer.getClass()); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/LastTimeStep.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/LastTimeStep.java index a869999dc..a5dff218f 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/LastTimeStep.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/LastTimeStep.java @@ -23,7 +23,7 @@ package org.deeplearning4j.nn.conf.layers.recurrent; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; -import org.deeplearning4j.nn.conf.layers.wrapper.BaseWrapperLayer; +import org.deeplearning4j.nn.conf.layers.wrapper.BaseWrapperLayerConfiguration; import org.deeplearning4j.nn.layers.recurrent.LastTimeStepLayer; import org.deeplearning4j.optimize.api.TrainingListener; import org.nd4j.linalg.api.buffer.DataType; @@ -31,7 +31,7 @@ import org.nd4j.linalg.api.ndarray.INDArray; import java.util.Collection; -public class LastTimeStep extends BaseWrapperLayer { +public class LastTimeStep extends BaseWrapperLayerConfiguration { private LastTimeStep() {} diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/SimpleRnn.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/SimpleRnn.java index bda494c1d..1d4c182aa 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/SimpleRnn.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/SimpleRnn.java @@ -60,7 +60,7 @@ public class SimpleRnn extends BaseRecurrentLayer { org.deeplearning4j.nn.layers.recurrent.SimpleRnn ret = new org.deeplearning4j.nn.layers.recurrent.SimpleRnn(lconf, networkDataType); - ret.setListeners(trainingListeners); + ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); Map paramTable = initializer().init(this, layerParamsView, initializeParams); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/TimeDistributed.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/TimeDistributed.java index 7ab6370b7..73cddbf14 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/TimeDistributed.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/TimeDistributed.java @@ -28,7 +28,7 @@ import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.RNNFormat; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; -import org.deeplearning4j.nn.conf.layers.wrapper.BaseWrapperLayer; +import org.deeplearning4j.nn.conf.layers.wrapper.BaseWrapperLayerConfiguration; import org.deeplearning4j.nn.layers.recurrent.TimeDistributedLayer; import org.deeplearning4j.optimize.api.TrainingListener; import org.nd4j.linalg.api.buffer.DataType; @@ -39,7 +39,7 @@ import java.util.Collection; @Data @EqualsAndHashCode(callSuper = true) -public class TimeDistributed extends BaseWrapperLayer { +public class TimeDistributed extends BaseWrapperLayerConfiguration { private RNNFormat rnnDataFormat = RNNFormat.NCW; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffVertex.java index cfec8d653..e9bded983 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffVertex.java @@ -23,7 +23,7 @@ package org.deeplearning4j.nn.conf.layers.samediff; import lombok.Data; import lombok.EqualsAndHashCode; import org.deeplearning4j.nn.api.MaskState; -import org.deeplearning4j.nn.api.TrainingConfig; +import org.deeplearning4j.nn.api.ITraininableLayerConfiguration; import org.deeplearning4j.nn.conf.GradientNormalization; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.graph.GraphVertex; @@ -46,7 +46,7 @@ import java.util.Map; @Data @EqualsAndHashCode(callSuper = false) -public abstract class SameDiffVertex extends GraphVertex implements TrainingConfig { +public abstract class SameDiffVertex extends GraphVertex implements ITraininableLayerConfiguration { private SDVertexParams vertexParams; private String name; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/util/MaskZeroLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/util/MaskZeroLayer.java index 7f11874e8..18f9cadc1 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/util/MaskZeroLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/util/MaskZeroLayer.java @@ -25,7 +25,7 @@ import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; -import org.deeplearning4j.nn.conf.layers.wrapper.BaseWrapperLayer; +import org.deeplearning4j.nn.conf.layers.wrapper.BaseWrapperLayerConfiguration; import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.optimize.api.TrainingListener; import org.nd4j.linalg.api.buffer.DataType; @@ -36,7 +36,7 @@ import java.util.Collection; @Data @EqualsAndHashCode(callSuper = false) -public class MaskZeroLayer extends BaseWrapperLayer { +public class MaskZeroLayer extends BaseWrapperLayerConfiguration { private double maskingValue = 0.0; @@ -61,7 +61,7 @@ public class MaskZeroLayer extends BaseWrapperLayer { boolean initializeParams, DataType networkDataType) { NeuralNetConfiguration conf2 = conf.clone(); - conf2.setLayer(((BaseWrapperLayer) this).getUnderlying()); + conf2.setLayer(((BaseWrapperLayerConfiguration) this).getUnderlying()); org.deeplearning4j.nn.api.Layer underlyingLayer = underlying.instantiate(conf2, trainingListeners, layerIndex, layerParamsView, initializeParams, networkDataType); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/VariationalAutoencoder.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/VariationalAutoencoder.java index 4e6a0c41c..85f06a40b 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/VariationalAutoencoder.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/VariationalAutoencoder.java @@ -73,7 +73,7 @@ public class VariationalAutoencoder extends BasePretrainNetwork { org.deeplearning4j.nn.layers.variational.VariationalAutoencoder ret = new org.deeplearning4j.nn.layers.variational.VariationalAutoencoder(lconf, networkDataType); - ret.setListeners(trainingListeners); + ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); Map paramTable = initializer().init(this, layerParamsView, initializeParams); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/wrapper/BaseWrapperLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/wrapper/BaseWrapperLayer.java deleted file mode 100644 index 2495fbd56..000000000 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/wrapper/BaseWrapperLayer.java +++ /dev/null @@ -1,123 +0,0 @@ -/* - * ****************************************************************************** - * * - * * - * * This program and the accompanying materials are made available under the - * * terms of the Apache License, Version 2.0 which is available at - * * https://www.apache.org/licenses/LICENSE-2.0. - * * - * * See the NOTICE file distributed with this work for additional - * * information regarding copyright ownership. - * * Unless required by applicable law or agreed to in writing, software - * * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * * License for the specific language governing permissions and limitations - * * under the License. - * * - * * SPDX-License-Identifier: Apache-2.0 - * ***************************************************************************** - */ - -package org.deeplearning4j.nn.conf.layers.wrapper; - -import lombok.Data; -import lombok.EqualsAndHashCode; -import lombok.NonNull; -import org.deeplearning4j.nn.api.ParamInitializer; -import org.deeplearning4j.nn.conf.GradientNormalization; -import org.deeplearning4j.nn.conf.InputPreProcessor; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.conf.layers.LayerConfiguration; -import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; -import org.deeplearning4j.nn.params.WrapperLayerParamInitializer; -import org.nd4j.linalg.learning.regularization.Regularization; - -import java.util.List; - -@Data -@EqualsAndHashCode(callSuper = false) -public abstract class BaseWrapperLayer extends LayerConfiguration { - - /** - * Set the net configuration for this configuration as well as for the underlying layer - * (if not null there) - * - * @param netConfiguration the neural net configuration - */ - @Override - public void setNetConfiguration(NeuralNetConfiguration netConfiguration) { - super.setNetConfiguration(netConfiguration); - if(getUnderlying().getNetConfiguration() == null) { - getUnderlying().setNetConfiguration( - netConfiguration); //also set netconf for underlying if not set - } - } - - protected LayerConfiguration underlying; - - protected BaseWrapperLayer(Builder builder) { - super(builder); - } - - protected BaseWrapperLayer() {} - - public BaseWrapperLayer(LayerConfiguration underlying) { - this.underlying = underlying; - this.setNetConfiguration(underlying.getNetConfiguration()); - } - - @Override - public ParamInitializer initializer() { - return WrapperLayerParamInitializer.getInstance(); - } - - @Override - public InputType getOutputType(int layerIndex, InputType inputType) { - return underlying.getOutputType(layerIndex, inputType); - } - - @Override - public void setNIn(InputType inputType, boolean override) { - underlying.setNIn(inputType, override); - } - - @Override - public InputPreProcessor getPreProcessorForInputType(InputType inputType) { - return underlying.getPreProcessorForInputType(inputType); - } - - @Override - public List getRegularizationByParam(String paramName){ - return underlying.getRegularizationByParam(paramName); - } - - @Override - public GradientNormalization getGradientNormalization() { - return underlying.getGradientNormalization(); - } - - @Override - public double getGradientNormalizationThreshold() { - return underlying.getGradientNormalizationThreshold(); - } - - @Override - public boolean isPretrainParam(String paramName) { - return underlying.isPretrainParam(paramName); - } - - @Override - public LayerMemoryReport getMemoryReport(InputType inputType) { - return underlying.getMemoryReport(inputType); - } - - @Override - public void setLayerName(String layerName) { - super.setLayerName(layerName); - if (underlying != null) { - //May be null at some points during JSON deserialization - underlying.setLayerName(layerName); - } - } -} diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/wrapper/BaseWrapperLayerConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/wrapper/BaseWrapperLayerConfiguration.java new file mode 100644 index 000000000..74b71de1f --- /dev/null +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/wrapper/BaseWrapperLayerConfiguration.java @@ -0,0 +1,196 @@ +/* + * ****************************************************************************** + * * + * * + * * This program and the accompanying materials are made available under the + * * terms of the Apache License, Version 2.0 which is available at + * * https://www.apache.org/licenses/LICENSE-2.0. + * * + * * See the NOTICE file distributed with this work for additional + * * information regarding copyright ownership. + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * * License for the specific language governing permissions and limitations + * * under the License. + * * + * * SPDX-License-Identifier: Apache-2.0 + * ***************************************************************************** + */ + +package org.deeplearning4j.nn.conf.layers.wrapper; + +import java.util.List; +import lombok.Data; +import lombok.EqualsAndHashCode; +import org.deeplearning4j.nn.api.ParamInitializer; +import org.deeplearning4j.nn.conf.GradientNormalization; +import org.deeplearning4j.nn.conf.InputPreProcessor; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.dropout.IDropout; +import org.deeplearning4j.nn.conf.inputs.InputType; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; +import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; +import org.deeplearning4j.nn.conf.weightnoise.IWeightNoise; +import org.deeplearning4j.nn.params.WrapperLayerParamInitializer; +import org.nd4j.linalg.activations.IActivation; +import org.nd4j.linalg.learning.config.IUpdater; +import org.nd4j.linalg.learning.regularization.Regularization; + +@Data +@EqualsAndHashCode(callSuper = false) +public abstract class BaseWrapperLayerConfiguration extends LayerConfiguration { + + /** + * The configuration to of another layer to wrap + */ + protected LayerConfiguration underlying; + + protected BaseWrapperLayerConfiguration(Builder builder) { + super(builder); + } + + protected BaseWrapperLayerConfiguration() { + } + + public BaseWrapperLayerConfiguration(LayerConfiguration underlying) { + this.underlying = underlying; + this.setNetConfiguration(underlying.getNetConfiguration()); + } + + /** + * Set the net configuration for this configuration as well as for the underlying layer (if not + * null there) + * + * @param netConfiguration the neural net configuration + */ + @Override + public void setNetConfiguration(NeuralNetConfiguration netConfiguration) { + super.setNetConfiguration(netConfiguration); + if (underlying.getNetConfiguration() == null) { + underlying.setNetConfiguration( + netConfiguration); //also set netconf for underlying if not set + } + } + + /** + * @return + */ + @Override + public IActivation getActivationFn() { + return underlying.getActivationFn(); + } + + /** + * @return + */ + @Override + public IDropout getIDropout() { + return underlying.getIDropout(); + } + + /** + * @param activationFn + */ + @Override + public void setActivationFn(IActivation activationFn) { + underlying.setActivationFn(activationFn); + } + + /** + * @param iDropout + */ + @Override + public void setIDropout(IDropout iDropout) { + underlying.setIDropout(iDropout); + } + + /** + * @param weightNoise + */ + @Override + public void setWeightNoise(IWeightNoise weightNoise) { + underlying.setWeightNoise(weightNoise); + } + + /** + * @param s + */ + @Override + public void addVariable(String s) { + underlying.addVariable(s); + } + + /** + * Get the updater for the given parameter. Typically, the same updater will be used for all + * parameters, but this is not necessarily the case + * + * @param paramName Parameter name + * @return IUpdater for the parameter + */ + @Override + public IUpdater getUpdaterByParam(String paramName) { + return underlying.getUpdaterByParam(paramName); + } + + /** + * @param iUpdater + */ + @Override + public void setIUpdater(IUpdater iUpdater) { + underlying.setIUpdater(iUpdater); + } + + /** + * @return + */ + @Override + public IUpdater getIUpdater() { + return underlying.getIUpdater(); + } + + @Override + public ParamInitializer initializer() { + return WrapperLayerParamInitializer.getInstance(); + } + + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + return underlying.getOutputType(layerIndex, inputType); + } + + @Override + public void setNIn(InputType inputType, boolean override) { + underlying.setNIn(inputType, override); + } + + @Override + public InputPreProcessor getPreProcessorForInputType(InputType inputType) { + return underlying.getPreProcessorForInputType(inputType); + } + + @Override + public List getRegularizationByParam(String paramName) { + return underlying.getRegularizationByParam(paramName); + } + + @Override + public boolean isPretrainParam(String paramName) { + return underlying.isPretrainParam(paramName); + } + + @Override + public LayerMemoryReport getMemoryReport(InputType inputType) { + return underlying.getMemoryReport(inputType); + } + + @Override + public void setLayerName(String layerName) { + super.setLayerName(layerName); + if (underlying != null) { + //May be null at some points during JSON deserialization + underlying.setLayerName(layerName); + } + } + +} diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/misc/DummyConfig.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/misc/DummyConfig.java index 8cc5e6e20..c2e149c25 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/misc/DummyConfig.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/misc/DummyConfig.java @@ -21,7 +21,7 @@ package org.deeplearning4j.nn.conf.misc; import lombok.AllArgsConstructor; -import org.deeplearning4j.nn.api.TrainingConfig; +import org.deeplearning4j.nn.api.ITraininableLayerConfiguration; import org.deeplearning4j.nn.conf.GradientNormalization; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.learning.config.IUpdater; @@ -31,7 +31,7 @@ import org.nd4j.linalg.learning.regularization.Regularization; import java.util.List; @AllArgsConstructor -public class DummyConfig implements TrainingConfig { +public class DummyConfig implements ITraininableLayerConfiguration { private final String name; @Override diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/ocnn/OCNNOutputLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/ocnn/OCNNOutputLayer.java index 8469c6f62..34a888303 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/ocnn/OCNNOutputLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/ocnn/OCNNOutputLayer.java @@ -73,7 +73,7 @@ public class OCNNOutputLayer extends BaseOutputLayer { super(builder); this.hiddenSize = builder.hiddenLayerSize; this.nu = builder.nu; - this.activationFn = builder.activation; + setActivationFn( builder.activation) ; this.windowSize = builder.windowSize; this.initialRValue = builder.initialRValue; this.configureR = builder.configureR; @@ -88,7 +88,7 @@ public class OCNNOutputLayer extends BaseOutputLayer { @JsonProperty("configureR") boolean configureR) { this.hiddenSize = hiddenSize; this.nu = nu; - this.activationFn = activation; + setActivationFn( activation); this.windowSize = windowSize; this.initialRValue = initialRValue; this.configureR = configureR; @@ -107,13 +107,13 @@ public class OCNNOutputLayer extends BaseOutputLayer { org.deeplearning4j.nn.layers.ocnn.OCNNOutputLayer ret = new org.deeplearning4j.nn.layers.ocnn.OCNNOutputLayer(lconf, networkDataType); - ret.setListeners(trainingListeners); + ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); ret.setLayerConfiguration(lconf); - ret.setActivation(activationFn); + ret.setActivation(getActivationFn()); if (lastEpochSinceRUpdated == 0 && configureR) { paramTable.get(OCNNParamInitializer.R_KEY).putScalar(0, initialRValue); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/BaseNetConfigDeserializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/BaseNetConfigDeserializer.java index c6a2cbb26..292b85c10 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/BaseNetConfigDeserializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/BaseNetConfigDeserializer.java @@ -24,7 +24,7 @@ import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.distribution.Distribution; -import org.deeplearning4j.nn.conf.layers.BaseLayer; +import org.deeplearning4j.nn.conf.layers.BaseLayerConfiguration; import org.deeplearning4j.nn.conf.layers.BaseOutputLayer; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.weights.*; @@ -66,8 +66,8 @@ public abstract class BaseNetConfigDeserializer extends StdDeserializer im protected boolean requiresIUpdaterFromLegacy(LayerConfiguration[] layers){ for(LayerConfiguration l : layers){ - if(l instanceof BaseLayer){ - BaseLayer bl = (BaseLayer)l; + if(l instanceof BaseLayerConfiguration){ + BaseLayerConfiguration bl = (BaseLayerConfiguration)l; if(bl.getIUpdater() == null && bl.initializer().numParams(bl) > 0){ return true; } @@ -87,7 +87,8 @@ public abstract class BaseNetConfigDeserializer extends StdDeserializer im protected boolean requiresRegularizationFromLegacy(LayerConfiguration[] layers){ for(LayerConfiguration l : layers){ - if(l instanceof BaseLayer && ((BaseLayer)l).getRegularization() == null){ + if(l instanceof BaseLayerConfiguration + && ((BaseLayerConfiguration)l).getRegularization() == null){ return true; } } @@ -96,7 +97,8 @@ public abstract class BaseNetConfigDeserializer extends StdDeserializer im protected boolean requiresWeightInitFromLegacy(LayerConfiguration[] layers){ for(LayerConfiguration l : layers){ - if(l instanceof BaseLayer && ((BaseLayer)l).getWeightInitFn() == null){ + if(l instanceof BaseLayerConfiguration + && ((BaseLayerConfiguration)l).getWeightInitFn() == null){ return true; } } @@ -105,7 +107,8 @@ public abstract class BaseNetConfigDeserializer extends StdDeserializer im protected boolean requiresActivationFromLegacy(LayerConfiguration[] layers){ for(LayerConfiguration l : layers){ - if(l instanceof BaseLayer && ((BaseLayer)l).getActivationFn() == null){ + if(l instanceof BaseLayerConfiguration + && ((BaseLayerConfiguration)l).getActivationFn() == null){ return true; } } @@ -121,7 +124,7 @@ public abstract class BaseNetConfigDeserializer extends StdDeserializer im return false; } - protected void handleUpdaterBackwardCompatibility(BaseLayer layer, ObjectNode on){ + protected void handleUpdaterBackwardCompatibility(BaseLayerConfiguration layer, ObjectNode on){ if(on != null && on.has("updater")){ String updaterName = on.get("updater").asText(); if(updaterName != null){ @@ -202,42 +205,43 @@ public abstract class BaseNetConfigDeserializer extends StdDeserializer im } } - protected void handleL1L2BackwardCompatibility(BaseLayer baseLayer, ObjectNode on){ + protected void handleL1L2BackwardCompatibility(BaseLayerConfiguration baseLayerConfiguration, ObjectNode on){ if(on != null && (on.has("l1") || on.has("l2"))){ //Legacy format JSON - baseLayer.setRegularization(new ArrayList()); - baseLayer.setRegularizationBias(new ArrayList()); + baseLayerConfiguration.setRegularization(new ArrayList()); + baseLayerConfiguration.setRegularizationBias(new ArrayList()); if(on.has("l1")){ double l1 = on.get("l1").doubleValue(); if(l1 > 0.0){ - baseLayer.getRegularization().add(new L1Regularization(l1)); + baseLayerConfiguration.getRegularization().add(new L1Regularization(l1)); } } if(on.has("l2")){ double l2 = on.get("l2").doubleValue(); if(l2 > 0.0){ //Default to non-LR based WeightDecay, to match behaviour in 1.0.0-beta3 - baseLayer.getRegularization().add(new WeightDecay(l2, false)); + baseLayerConfiguration.getRegularization().add(new WeightDecay(l2, false)); } } if(on.has("l1Bias")){ double l1Bias = on.get("l1Bias").doubleValue(); if(l1Bias > 0.0){ - baseLayer.getRegularizationBias().add(new L1Regularization(l1Bias)); + baseLayerConfiguration.getRegularizationBias().add(new L1Regularization(l1Bias)); } } if(on.has("l2Bias")){ double l2Bias = on.get("l2Bias").doubleValue(); if(l2Bias > 0.0){ //Default to non-LR based WeightDecay, to match behaviour in 1.0.0-beta3 - baseLayer.getRegularizationBias().add(new WeightDecay(l2Bias, false)); + baseLayerConfiguration.getRegularizationBias().add(new WeightDecay(l2Bias, false)); } } } } - protected void handleWeightInitBackwardCompatibility(BaseLayer baseLayer, ObjectNode on){ + protected void handleWeightInitBackwardCompatibility( + BaseLayerConfiguration baseLayerConfiguration, ObjectNode on){ if(on != null && on.has("weightInit") ){ //Legacy format JSON if(on.has("weightInit")){ @@ -250,7 +254,7 @@ public abstract class BaseNetConfigDeserializer extends StdDeserializer im d = NeuralNetConfiguration.mapper().readValue(dist, Distribution.class); } IWeightInit iwi = w.getWeightInitFunction(d); - baseLayer.setWeightInitFn(iwi); + baseLayerConfiguration.setWeightInitFn(iwi); } catch (Throwable t){ log.warn("Failed to infer weight initialization from legacy JSON format",t); } @@ -259,8 +263,9 @@ public abstract class BaseNetConfigDeserializer extends StdDeserializer im } //Changed after 0.7.1 from "activationFunction" : "softmax" to "activationFn" : - protected void handleActivationBackwardCompatibility(BaseLayer baseLayer, ObjectNode on){ - if(baseLayer.getActivationFn() == null && on.has("activationFunction")){ + protected void handleActivationBackwardCompatibility( + BaseLayerConfiguration baseLayerConfiguration, ObjectNode on){ + if(baseLayerConfiguration.getActivationFn() == null && on.has("activationFunction")){ String afn = on.get("activationFunction").asText(); IActivation a = null; try { @@ -272,7 +277,7 @@ public abstract class BaseNetConfigDeserializer extends StdDeserializer im | InvocationTargetException instantiationException){ log.error(instantiationException.getMessage()); } - baseLayer.setActivationFn(a); + baseLayerConfiguration.setActivationFn(a); } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/ComputationGraphConfigurationDeserializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/ComputationGraphConfigurationDeserializer.java index cf9282771..92399e037 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/ComputationGraphConfigurationDeserializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/ComputationGraphConfigurationDeserializer.java @@ -26,7 +26,7 @@ import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.dropout.Dropout; import org.deeplearning4j.nn.conf.graph.GraphVertex; import org.deeplearning4j.nn.conf.graph.LayerVertex; -import org.deeplearning4j.nn.conf.layers.BaseLayer; +import org.deeplearning4j.nn.conf.layers.BaseLayerConfiguration; import org.deeplearning4j.nn.conf.layers.BaseOutputLayer; import org.deeplearning4j.nn.conf.layers.BatchNormalization; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; @@ -118,20 +118,24 @@ public class ComputationGraphConfigurationDeserializer continue; } - if(attemptIUpdaterFromLegacy && layers[layerIdx] instanceof BaseLayer && ((BaseLayer)layers[layerIdx]).getIUpdater() == null){ - handleUpdaterBackwardCompatibility((BaseLayer)layers[layerIdx], (ObjectNode)next); + if(attemptIUpdaterFromLegacy && layers[layerIdx] instanceof BaseLayerConfiguration + && ((BaseLayerConfiguration)layers[layerIdx]).getIUpdater() == null){ + handleUpdaterBackwardCompatibility((BaseLayerConfiguration)layers[layerIdx], (ObjectNode)next); } - if(requireLegacyRegularizationHandling && layers[layerIdx] instanceof BaseLayer && ((BaseLayer)layers[layerIdx]).getRegularization() == null){ - handleL1L2BackwardCompatibility((BaseLayer)layers[layerIdx], (ObjectNode)next); + if(requireLegacyRegularizationHandling && layers[layerIdx] instanceof BaseLayerConfiguration + && ((BaseLayerConfiguration)layers[layerIdx]).getRegularization() == null){ + handleL1L2BackwardCompatibility((BaseLayerConfiguration)layers[layerIdx], (ObjectNode)next); } - if(requiresLegacyWeightInitHandling && layers[layerIdx] instanceof BaseLayer && ((BaseLayer)layers[layerIdx]).getWeightInitFn() == null){ - handleWeightInitBackwardCompatibility((BaseLayer)layers[layerIdx], (ObjectNode)next); + if(requiresLegacyWeightInitHandling && layers[layerIdx] instanceof BaseLayerConfiguration + && ((BaseLayerConfiguration)layers[layerIdx]).getWeightInitFn() == null){ + handleWeightInitBackwardCompatibility((BaseLayerConfiguration)layers[layerIdx], (ObjectNode)next); } - if(requiresLegacyActivationHandling && layers[layerIdx] instanceof BaseLayer && ((BaseLayer)layers[layerIdx]).getActivationFn() == null){ - handleActivationBackwardCompatibility((BaseLayer)layers[layerIdx], (ObjectNode)next); + if(requiresLegacyActivationHandling && layers[layerIdx] instanceof BaseLayerConfiguration + && ((BaseLayerConfiguration)layers[layerIdx]).getActivationFn() == null){ + handleActivationBackwardCompatibility((BaseLayerConfiguration)layers[layerIdx], (ObjectNode)next); } if(requiresLegacyLossHandling && layers[layerIdx] instanceof BaseOutputLayer && ((BaseOutputLayer)layers[layerIdx]).getLossFn() == null){ @@ -144,9 +148,9 @@ public class ComputationGraphConfigurationDeserializer double d = next.get("dropOut").asDouble(); if(!Double.isNaN(d)){ //Might be dropout or dropconnect... - if(layers[layerIdx] instanceof BaseLayer && confNode.has("useDropConnect") + if(layers[layerIdx] instanceof BaseLayerConfiguration && confNode.has("useDropConnect") && confNode.get("useDropConnect").asBoolean(false)){ - ((BaseLayer)layers[layerIdx]).setWeightNoise(new DropConnect(d)); + ((BaseLayerConfiguration)layers[layerIdx]).setWeightNoise(new DropConnect(d)); } else { layers[layerIdx].setIDropout(new Dropout(d)); } @@ -155,11 +159,12 @@ public class ComputationGraphConfigurationDeserializer } layerIdx++; } else if("org.deeplearning4j.nn.conf.graph.LayerVertex".equals(cls)){ - if(requiresLegacyWeightInitHandling && layers[layerIdx] instanceof BaseLayer && ((BaseLayer)layers[layerIdx]).getWeightInitFn() == null) { + if(requiresLegacyWeightInitHandling && layers[layerIdx] instanceof BaseLayerConfiguration + && ((BaseLayerConfiguration)layers[layerIdx]).getWeightInitFn() == null) { //Post JSON format change for subclasses, but before WeightInit was made a class confNode = (ObjectNode) next.get("layerConf"); next = confNode.get("layer"); - handleWeightInitBackwardCompatibility((BaseLayer) layers[layerIdx], (ObjectNode) next); + handleWeightInitBackwardCompatibility((BaseLayerConfiguration) layers[layerIdx], (ObjectNode) next); } layerIdx++; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/NeuralNetConfigurationDeserializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/NeuralNetConfigurationDeserializer.java index 17a474e78..633650b95 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/NeuralNetConfigurationDeserializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/NeuralNetConfigurationDeserializer.java @@ -23,7 +23,7 @@ package org.deeplearning4j.nn.conf.serde; import org.apache.commons.io.IOUtils; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.dropout.Dropout; -import org.deeplearning4j.nn.conf.layers.BaseLayer; +import org.deeplearning4j.nn.conf.layers.BaseLayerConfiguration; import org.deeplearning4j.nn.conf.layers.BaseOutputLayer; import org.deeplearning4j.nn.conf.layers.BatchNormalization; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; @@ -86,7 +86,8 @@ public class NeuralNetConfigurationDeserializer extends BaseNetConfigDeserialize for( int i=0; i (first/only child) -> updater if(on.has("layer")){ confNode = on; @@ -96,7 +97,7 @@ public class NeuralNetConfigurationDeserializer extends BaseNetConfigDeserialize } on = (ObjectNode) on.elements().next(); - handleUpdaterBackwardCompatibility((BaseLayer)layers[i], on); + handleUpdaterBackwardCompatibility((BaseLayerConfiguration)layers[i], on); } if(attemptIUpdaterFromLegacy) { @@ -106,9 +107,10 @@ public class NeuralNetConfigurationDeserializer extends BaseNetConfigDeserialize double d = on.get("dropOut").asDouble(); if (!Double.isNaN(d)) { //Might be dropout or dropconnect... - if (confNode != null && layers[i] instanceof BaseLayer && confNode.has("useDropConnect") + if (confNode != null && layers[i] instanceof BaseLayerConfiguration + && confNode.has("useDropConnect") && confNode.get("useDropConnect").asBoolean(false)) { - ((BaseLayer) layers[i]).setWeightNoise(new DropConnect(d)); + ((BaseLayerConfiguration) layers[i]).setWeightNoise(new DropConnect(d)); } else { if (d > 0.0) { layers[i].setIDropout(new Dropout(d)); @@ -133,16 +135,19 @@ public class NeuralNetConfigurationDeserializer extends BaseNetConfigDeserialize } } - if(requiresLegacyRegularizationHandling && layers[i] instanceof BaseLayer && ((BaseLayer) layers[i]).getRegularization() == null) { - handleL1L2BackwardCompatibility((BaseLayer) layers[i], on); + if(requiresLegacyRegularizationHandling && layers[i] instanceof BaseLayerConfiguration + && ((BaseLayerConfiguration) layers[i]).getRegularization() == null) { + handleL1L2BackwardCompatibility((BaseLayerConfiguration) layers[i], on); } - if(requiresLegacyWeightInitHandling && layers[i] instanceof BaseLayer && ((BaseLayer) layers[i]).getWeightInitFn() == null) { - handleWeightInitBackwardCompatibility((BaseLayer) layers[i], on); + if(requiresLegacyWeightInitHandling && layers[i] instanceof BaseLayerConfiguration + && ((BaseLayerConfiguration) layers[i]).getWeightInitFn() == null) { + handleWeightInitBackwardCompatibility((BaseLayerConfiguration) layers[i], on); } - if(requiresLegacyActivationHandling && layers[i] instanceof BaseLayer && ((BaseLayer)layers[i]).getActivationFn() == null){ - handleActivationBackwardCompatibility((BaseLayer) layers[i], on); + if(requiresLegacyActivationHandling && layers[i] instanceof BaseLayerConfiguration + && ((BaseLayerConfiguration)layers[i]).getActivationFn() == null){ + handleActivationBackwardCompatibility((BaseLayerConfiguration) layers[i], on); } if(requiresLegacyLossHandling && layers[i] instanceof BaseOutputLayer && ((BaseOutputLayer)layers[i]).getLossFn() == null){ diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java index 34d9b8c50..cc0c13506 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java @@ -25,7 +25,6 @@ import lombok.NonNull; import lombok.Setter; import lombok.extern.slf4j.Slf4j; import lombok.val; -import net.brutex.ai.dnn.api.IModel; import net.brutex.ai.dnn.networks.ArtificialNeuralNetwork; import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.StringUtils; @@ -690,7 +689,8 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali // now we init solver & optimizer if (solver == null) { try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { - solver = new Solver.Builder().configure(getNetConfiguration()).listeners(getListeners()).model(this).build(); + solver = new Solver.Builder().configure(getNetConfiguration()).listeners( + getTrainingListeners()).model(this).build(); solver.initOptimizer(); } } @@ -1159,7 +1159,8 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali } else { if (solver == null) { try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { - solver = new Solver.Builder().configure(getNetConfiguration()).listeners(getListeners()).model(this).build(); + solver = new Solver.Builder().configure(getNetConfiguration()).listeners( + getTrainingListeners()).model(this).build(); } } @@ -2886,7 +2887,7 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali @Override public ComputationGraph clone() { ComputationGraph cg = new ComputationGraph(computationGraphConfiguration.clone()); - cg.init(params().dup(), false); + cg.init(getModelParams().dup(), false); if (solver != null) { //If solver is null: updater hasn't been initialized -> getUpdater call will force initialization, however ComputationGraphUpdater u = this.getUpdater(); @@ -2919,12 +2920,12 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali /** * Set the trainingListeners for the ComputationGraph (and all layers in the network) */ - public void setListeners(Collection listeners) { + public void addTrainingListeners(Collection listeners) { if (layers == null) init(); for (Layer l : layers) { - l.setListeners(listeners.toArray(new TrainingListener[]{})); + l.addTrainingListeners(listeners.toArray(new TrainingListener[]{})); } if (solver != null) { @@ -2962,7 +2963,7 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali /** * Set the trainingListeners for the ComputationGraph (and all layers in the network) */ - public void setListeners(TrainingListener... listeners) { + public void addTrainingListeners(TrainingListener... listeners) { List list = new ArrayList<>(); //Check: user might have done setListeners(null) thinking this would clear the current listeners. //This results in an TrainingListener[1] with a single null value -> results in a NPE later @@ -2972,7 +2973,7 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali list.add(i); } } - setListeners(list); + addTrainingListeners(list); } /** @@ -2980,26 +2981,11 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali * * @param listeners Listeners to add */ - @Override - public void addListeners(TrainingListener... listeners) { - if (this.trainingListeners == null) { - setListeners(listeners); - return; - } else { - List newListeners = new ArrayList<>(this.trainingListeners); //To avoid immutable list issues - Collections.addAll(newListeners, listeners); - setListeners(newListeners); - } - - if (solver != null) { - solver.setListeners(this.trainingListeners); - } - } /** * Get the trainingListeners for the ComputationGraph */ - public Collection getListeners() { + public Collection getTrainingListeners() { return trainingListeners; } @@ -3017,7 +3003,8 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali */ public ComputationGraphUpdater getUpdater(boolean initializeIfAbsent){ if (solver == null && initializeIfAbsent) { - solver = new Solver.Builder().configure(getNetConfiguration()).listeners(getListeners()).model(this).build(); + solver = new Solver.Builder().configure(getNetConfiguration()).listeners( + getTrainingListeners()).model(this).build(); solver.getOptimizer().setUpdaterComputationGraph(new ComputationGraphUpdater(this)); } if(solver != null) { @@ -3031,7 +3018,8 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali */ public void setUpdater(ComputationGraphUpdater updater) { if (solver == null) { - solver = new Solver.Builder().configure(getNetConfiguration()).listeners(getListeners()).model(this).build(); + solver = new Solver.Builder().configure(getNetConfiguration()).listeners( + getTrainingListeners()).model(this).build(); } solver.getOptimizer().setUpdaterComputationGraph(updater); } @@ -3048,11 +3036,11 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali } /** - * @deprecated To be removed. Use {@link #params()} + * @deprecated To be removed. Use {@link #getModelParams()} */ @Deprecated public INDArray params(boolean backwardOnly) { - return params(); + return getModelParams(); } /** @@ -3314,7 +3302,7 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali } @Override - public double score() { + public double getScore() { return score; } @@ -3323,7 +3311,7 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali } @Override - public INDArray params() { + public INDArray getModelParams() { return flattenedParams; } @@ -3410,7 +3398,7 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali @Override public Pair gradientAndScore() { - return new Pair<>(gradient(), score()); + return new Pair<>(gradient(), getScore()); } @Override @@ -3743,7 +3731,8 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali if (solver == null) { try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { - solver = new Solver.Builder().configure(getNetConfiguration()).listeners(getListeners()).model(this) + solver = new Solver.Builder().configure(getNetConfiguration()).listeners( + getTrainingListeners()).model(this) .build(); } } @@ -4511,8 +4500,8 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali } ret.append(StringUtils.repeat("-", totalLength)) - .append(String.format("\n%30s %,d", "Total Parameters: ", params().length())) - .append(String.format("\n%30s %,d", "Trainable Parameters: ", params().length() - frozenParams)) + .append(String.format("\n%30s %,d", "Total Parameters: ", getModelParams().length())) + .append(String.format("\n%30s %,d", "ITrainableLayer Parameters: ", getModelParams().length() - frozenParams)) .append(String.format("\n%30s %,d", "Frozen Parameters: ", frozenParams)) .append("\n") .append(StringUtils.repeat("=", totalLength)) @@ -4643,12 +4632,12 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali */ public ComputationGraph convertDataType(@NonNull DataType dataType){ Preconditions.checkState(dataType.isFPType(), "Invalid DataType: %s. Can only convert network to a floating point type", dataType); - if(dataType == params().dataType()){ + if(dataType == getModelParams().dataType()){ return this; } try(MemoryWorkspace ws = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { - INDArray newParams = params().castTo(dataType); + INDArray newParams = getModelParams().castTo(dataType); String jsonConfig = this.getComputationGraphConfiguration().toJson(); ComputationGraphConfiguration newConf = ComputationGraphConfiguration.fromJson(jsonConfig); newConf.setDataType(dataType); @@ -4875,7 +4864,7 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali return false; if (obj instanceof ComputationGraph) { ComputationGraph network = (ComputationGraph) obj; - boolean paramsEquals = network.params().equals(params()); + boolean paramsEquals = network.getModelParams().equals(getModelParams()); boolean confEquals = this.getComputationGraphConfiguration().equals(network.getComputationGraphConfiguration()); boolean updaterEquals = getUpdater().equals(network.getUpdater()); return paramsEquals && confEquals && updaterEquals; @@ -4922,4 +4911,22 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali Nd4j.getWorkspaceManager().destroyAllWorkspacesForCurrentThread(); System.gc(); } + + @Override + public ITraininableLayerConfiguration getTrainingConfig() { + throw new UnsupportedOperationException("Not supported"); + } + + /** + * @return 1d parameter vector + */ + @Override + public INDArray getParams() { + throw new RuntimeException("Not supported"); + } + + @Override + public boolean updaterDivideByMinibatch(String paramName) { + return false; + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/BaseGraphVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/BaseGraphVertex.java index 759f214bc..269e67ac0 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/BaseGraphVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/BaseGraphVertex.java @@ -23,7 +23,7 @@ package org.deeplearning4j.nn.graph.vertex; import lombok.Data; import lombok.Getter; import lombok.Setter; -import org.deeplearning4j.nn.api.TrainingConfig; +import org.deeplearning4j.nn.api.ITraininableLayerConfiguration; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.graph.vertex.impl.LayerVertex; import org.nd4j.linalg.api.buffer.DataType; @@ -213,16 +213,16 @@ public abstract class BaseGraphVertex implements GraphVertex { @Override public long numParams(){ - return params() == null ? 0 : params().length(); + return getParams() == null ? 0 : getParams().length(); } @Override - public TrainingConfig getConfig() { + public ITraininableLayerConfiguration getTrainingConfig() { return null; } @Override - public INDArray params() { + public INDArray getParams() { return null; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/BaseWrapperVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/BaseWrapperVertex.java index 0d2a3a26d..d73315645 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/BaseWrapperVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/BaseWrapperVertex.java @@ -22,7 +22,7 @@ package org.deeplearning4j.nn.graph.vertex; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; -import org.deeplearning4j.nn.api.TrainingConfig; +import org.deeplearning4j.nn.api.ITraininableLayerConfiguration; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.nd4j.linalg.api.ndarray.INDArray; @@ -184,13 +184,13 @@ public abstract class BaseWrapperVertex implements GraphVertex { } @Override - public TrainingConfig getConfig() { - return underlying.getConfig(); + public ITraininableLayerConfiguration getTrainingConfig() { + return underlying.getTrainingConfig(); } @Override - public INDArray params() { - return underlying.params(); + public INDArray getParams() { + return underlying.getParams(); } @Override diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/GraphVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/GraphVertex.java index 96ac34c19..51bd7ee62 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/GraphVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/GraphVertex.java @@ -20,9 +20,9 @@ package org.deeplearning4j.nn.graph.vertex; +import org.deeplearning4j.nn.api.ITrainableLayer; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; -import org.deeplearning4j.nn.api.Trainable; import org.deeplearning4j.nn.gradient.Gradient; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.common.primitives.Pair; @@ -31,7 +31,7 @@ import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import java.io.Serializable; import java.util.Map; -public interface GraphVertex extends Trainable, Serializable { +public interface GraphVertex extends ITrainableLayer, Serializable { /** Get the name/label of the GraphVertex */ diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/FrozenVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/FrozenVertex.java index c0b5999ac..a3f45121a 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/FrozenVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/FrozenVertex.java @@ -21,16 +21,13 @@ package org.deeplearning4j.nn.graph.vertex.impl; import java.util.Map; -import lombok.AllArgsConstructor; + import lombok.EqualsAndHashCode; -import org.deeplearning4j.nn.api.TrainingConfig; -import org.deeplearning4j.nn.conf.GradientNormalization; +import org.deeplearning4j.nn.api.ITraininableLayerConfiguration; import org.deeplearning4j.nn.conf.misc.DummyConfig; import org.deeplearning4j.nn.graph.vertex.BaseWrapperVertex; import org.deeplearning4j.nn.graph.vertex.GraphVertex; import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.learning.config.IUpdater; -import org.nd4j.linalg.learning.config.NoOp; @EqualsAndHashCode(callSuper = true, exclude = {"config"}) public class FrozenVertex extends BaseWrapperVertex { @@ -41,7 +38,7 @@ public class FrozenVertex extends BaseWrapperVertex { private transient DummyConfig config; @Override - public TrainingConfig getConfig(){ + public ITraininableLayerConfiguration getTrainingConfig(){ if (config == null) { config = new DummyConfig(getVertexName()); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/LayerVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/LayerVertex.java index 5f9ebdb35..a0df3e1bb 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/LayerVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/LayerVertex.java @@ -24,7 +24,7 @@ import lombok.Data; import lombok.EqualsAndHashCode; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; -import org.deeplearning4j.nn.api.TrainingConfig; +import org.deeplearning4j.nn.api.ITraininableLayerConfiguration; import org.deeplearning4j.nn.api.layers.IOutputLayer; import org.deeplearning4j.nn.api.layers.RecurrentLayer; import org.deeplearning4j.nn.conf.InputPreProcessor; @@ -263,13 +263,13 @@ public class LayerVertex extends BaseGraphVertex { } @Override - public TrainingConfig getConfig(){ - return getLayer().getConfig(); + public ITraininableLayerConfiguration getTrainingConfig(){ + return getLayer().getTrainingConfig(); } @Override - public INDArray params(){ - return layer.params(); + public INDArray getParams(){ + return layer.getParams(); } @Override diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java index edaa3fb80..e8501f312 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java @@ -20,23 +20,15 @@ package org.deeplearning4j.nn.layers; -import java.lang.ref.Cleaner; -import java.lang.ref.PhantomReference; -import java.lang.ref.Reference; -import java.lang.ref.WeakReference; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Map; -import lombok.AccessLevel; -import lombok.Data; -import lombok.Getter; -import lombok.NoArgsConstructor; -import lombok.NonNull; -import lombok.Setter; +import lombok.*; +import net.brutex.ai.dnn.api.IModel; +import org.deeplearning4j.nn.api.ITraininableLayerConfiguration; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; -import org.deeplearning4j.nn.api.TrainingConfig; import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.conf.CacheMode; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -55,122 +47,71 @@ import org.nd4j.linalg.dataset.api.MultiDataSet; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator; -/** - * A layer with input and output, no parameters or gradients - */ -@Data -@NoArgsConstructor +/** A layer with input and output, no parameters or gradients */ +@NoArgsConstructor(force = true) public abstract class AbstractLayer implements Layer { - @Setter(AccessLevel.NONE) - protected INDArray input; - protected INDArray preOutput; + private final @Getter List variables = new ArrayList<>(); + @Getter - @NonNull - protected LayerConf_T layerConfiguration; + @Setter(AccessLevel.MODULE) + protected INDArray + input; // TODO: this should be private, but too much code is still accessing input directly. + + protected INDArray preOutput; + /** The typed {@link LayerConfiguration}. */ + @Getter @NonNull protected LayerConf_T layerConfiguration; + protected boolean dropoutApplied = false; + @Getter @Setter @NonNull protected Collection trainingListeners = new ArrayList<>(); - @Deprecated public Collection getListeners() {return getTrainingListeners();} - @Deprecated public void setListeners(TrainingListener ... listeners) { setTrainingListeners(List.of(listeners));} - /** - * Set the {@link TrainingListener}s for this model. If any listeners have previously been set, - * they will be replaced by this method - * - * @param listeners - */ - @Deprecated - public void setListeners(Collection listeners) { - setTrainingListeners(listeners); - } - - protected int index = 0; - protected INDArray maskArray; - protected MaskState maskState; + protected @Getter @Setter int index = 0; + protected @Getter @Setter INDArray maskArray; + protected @Getter @Setter MaskState maskState; protected CacheMode cacheMode = CacheMode.NONE; protected boolean inputModificationAllowed = false; protected DataType dataType; - protected int iterationCount; - protected int epochCount; - private List variables = new ArrayList<>(); - public AbstractLayer(LayerConfiguration layerConfiguration, DataType dataType) { - this.layerConfiguration = (LayerConf_T) layerConfiguration; - if (layerConfiguration != null) { + protected @Getter @Setter int iterationCount; + protected @Getter @Setter int epochCount; + private @Getter @Setter IModel net; + + @Getter @Setter @NonNull private NeuralNetConfiguration netConfiguration; + + public AbstractLayer(@NonNull LayerConfiguration layerConf, @NonNull DataType dataType) { + //noinspection unchecked + this.layerConfiguration = (LayerConf_T) layerConf; + this.netConfiguration = layerConfiguration.getNetConfiguration(); + + if (layerConfiguration.getNetConfiguration() != null) { cacheMode = layerConfiguration.getNetConfiguration().getCacheMode(); } this.dataType = dataType; + this.net = layerConfiguration.getNetConfiguration().getNet(); + } + + public void addTrainingListeners(TrainingListener... listeners) { + trainingListeners.addAll(List.of(listeners)); + } + + public void addTrainingListeners(Collection listeners) { + trainingListeners.addAll(listeners); } - /** - * @param backpropOnly If true: return only parameters that are not exclusively used for layerwise - * pretraining - * @return Parameter table - */ @Override - public Map getParamTable(boolean backpropOnly) { - return null; - } - - public void setParamTable(Map map) { - throw new RuntimeException("Not implemented."); - } - /** - * @return 1D gradients view array - */ - @Override - public INDArray getGradientsViewArray() { - return null; + public INDArray activate(INDArray input, boolean training, LayerWorkspaceMgr workspaceMgr) { + setInput(input, workspaceMgr); + return activate(training, workspaceMgr); } /** - * Creates and returns a copy of this object. The precise meaning of "copy" may depend on the - * class of the object. The general intent is that, for any object {@code x}, the expression: - *
- *
-   * x.clone() != x
- * will be true, and that the expression: - *
- *
-   * x.clone().getClass() == x.getClass()
- * will be {@code true}, but these are not absolute requirements. While it is typically the case - * that: - *
- *
-   * x.clone().equals(x)
- * will be {@code true}, this is not an absolute requirement. - *

- * By convention, the returned object should be obtained by calling {@code super.clone}. If a - * class and all of its superclasses (except {@code Object}) obey this convention, it will be the - * case that {@code x.clone().getClass() == x.getClass()}. - *

- * By convention, the object returned by this method should be independent of this object (which - * is being cloned). To achieve this independence, it may be necessary to modify one or more - * fields of the object returned by {@code super.clone} before returning it. Typically, this - * means copying any mutable objects that comprise the internal "deep structure" of the object - * being cloned and replacing the references to these objects with references to the copies. If a - * class contains only primitive fields or references to immutable objects, then it is usually the - * case that no fields in the object returned by {@code super.clone} need to be modified. - *

- * The method {@code clone} for class {@code Object} performs a specific cloning operation. First, - * if the class of this object does not implement the interface {@code Cloneable}, then a - * {@code CloneNotSupportedException} is thrown. Note that all arrays are considered to implement - * the interface {@code Cloneable} and that the return type of the {@code clone} method of an - * array type {@code T[]} is {@code T[]} where T is any reference or primitive type. Otherwise, - * this method creates a new instance of the class of this object and initializes all its fields - * with exactly the contents of the corresponding fields of this object, as if by assignment; the - * contents of the fields are not themselves cloned. Thus, this method performs a "shallow copy" - * of this object, not a "deep copy" operation. - *

- * The class {@code Object} does not itself implement the interface {@code Cloneable}, so calling - * the {@code clone} method on an object whose class is {@code Object} will result in throwing an - * exception at run time. + * Creates and returns a copy of this object. * * @return a clone of this instance. * @throws CloneNotSupportedException if the object's class does not support the {@code Cloneable} - * interface. Subclasses that override the {@code clone} method - * can also throw this exception to indicate that an instance - * cannot be cloned. + * interface. Subclasses that override the {@code clone} method can also throw this exception + * to indicate that an instance cannot be cloned. * @see Cloneable */ @Override @@ -178,83 +119,6 @@ public abstract class AbstractLayer impl return super.clone(); } - /** - * Called by the garbage collector on an object when garbage collection determines that there are - * no more references to the object. A subclass overrides the {@code finalize} method to dispose - * of system resources or to perform other cleanup. - *

- * The general contract of {@code finalize} is that it is invoked if and when the Java™ - * virtual machine has determined that there is no longer any means by which this object can be - * accessed by any thread that has not yet died, except as a result of an action taken by the - * finalization of some other object or class which is ready to be finalized. The {@code finalize} - * method may take any action, including making this object available again to other threads; the - * usual purpose of {@code finalize}, however, is to perform cleanup actions before the object is - * irrevocably discarded. For example, the finalize method for an object that represents an - * input/output connection might perform explicit I/O transactions to break the connection before - * the object is permanently discarded. - *

- * The {@code finalize} method of class {@code Object} performs no special action; it simply - * returns normally. Subclasses of {@code Object} may override this definition. - *

- * The Java programming language does not guarantee which thread will invoke the {@code finalize} - * method for any given object. It is guaranteed, however, that the thread that invokes finalize - * will not be holding any user-visible synchronization locks when finalize is invoked. If an - * uncaught exception is thrown by the finalize method, the exception is ignored and finalization - * of that object terminates. - *

- * After the {@code finalize} method has been invoked for an object, no further action is taken - * until the Java virtual machine has again determined that there is no longer any means by which - * this object can be accessed by any thread that has not yet died, including possible actions by - * other objects or classes which are ready to be finalized, at which point the object may be - * discarded. - *

- * The {@code finalize} method is never invoked more than once by a Java virtual machine for any - * given object. - *

- * Any exception thrown by the {@code finalize} method causes the finalization of this object to - * be halted, but is otherwise ignored. - * - * @throws Throwable the {@code Exception} raised by this method - * @apiNote Classes that embed non-heap resources have many options for cleanup of those - * resources. The class must ensure that the lifetime of each instance is longer than that of any - * resource it embeds. {@link Reference#reachabilityFence} can be used to ensure that objects - * remain reachable while resources embedded in the object are in use. - *

- * A subclass should avoid overriding the {@code finalize} method unless the subclass embeds - * non-heap resources that must be cleaned up before the instance is collected. Finalizer - * invocations are not automatically chained, unlike constructors. If a subclass overrides - * {@code finalize} it must invoke the superclass finalizer explicitly. To guard against - * exceptions prematurely terminating the finalize chain, the subclass should use a - * {@code try-finally} block to ensure {@code super.finalize()} is always invoked. For example, - *

{@code      @Override
-   *     protected void finalize() throws Throwable {
-   *         try {
-   *             ... // cleanup subclass state
-   *         } finally {
-   *             super.finalize();
-   *         }
-   *     }
-   * }
- * @jls 12.6 Finalization of Class Instances - * @see WeakReference - * @see PhantomReference - * @deprecated The finalization mechanism is inherently problematic. Finalization can lead to - * performance issues, deadlocks, and hangs. Errors in finalizers can lead to resource leaks; - * there is no way to cancel finalization if it is no longer necessary; and no ordering is - * specified among calls to {@code finalize} methods of different objects. Furthermore, there are - * no guarantees regarding the timing of finalization. The {@code finalize} method might be called - * on a finalizable object only after an indefinite delay, if at all. - *

- * Classes whose instances hold non-heap resources should provide a method to enable explicit - * release of those resources, and they should also implement {@link AutoCloseable} if - * appropriate. The {@link Cleaner} and {@link PhantomReference} provide more flexible and - * efficient ways to release resources when an object becomes unreachable. - */ - @Override - protected void finalize() throws Throwable { - super.finalize(); - } - /** * This method returns updater state (if applicable), null otherwise * @@ -281,9 +145,7 @@ public abstract class AbstractLayer impl * @param dataSet */ @Override - public void fit(DataSet dataSet) { - - } + public void fit(DataSet dataSet) {} /** * This method fits model with a given MultiDataSet @@ -291,9 +153,7 @@ public abstract class AbstractLayer impl * @param dataSet */ @Override - public void fit(MultiDataSet dataSet) { - - } + public void fit(MultiDataSet dataSet) {} /** * This method fits model with a given DataSetIterator @@ -301,9 +161,7 @@ public abstract class AbstractLayer impl * @param iterator */ @Override - public void fit(DataSetIterator iterator) { - - } + public void fit(DataSetIterator iterator) {} /** * This method fits model with a given MultiDataSetIterator @@ -311,9 +169,7 @@ public abstract class AbstractLayer impl * @param iterator */ @Override - public void fit(MultiDataSetIterator iterator) { - - } + public void fit(MultiDataSetIterator iterator) {} /** * This method executes evaluation of the model against given iterator and evaluation @@ -339,31 +195,9 @@ public abstract class AbstractLayer impl return null; } - /** - * @param netConfiguration - */ + /** Init the model */ @Override - public void setNetConfiguration(@NonNull NeuralNetConfiguration netConfiguration) { - - } - - /** - * Init the model - */ - @Override - public void init() { - - } - - /** - * This method ADDS additional TrainingListener to existing listeners - * - * @param listener - */ - @Override - public void addListeners(TrainingListener... listener) { - this.trainingListeners.addAll(List.of(listener)); - } + public void init() {} /** * Update layer weights and biases with gradient change @@ -371,20 +205,16 @@ public abstract class AbstractLayer impl * @param gradient */ @Override - public void update(Gradient gradient) { - - } + public void update(Gradient gradient) {} /** * Perform one update applying the gradient * - * @param gradient the gradient to apply + * @param gradient the gradient to apply * @param paramType */ @Override - public void update(INDArray gradient, String paramType) { - - } + public void update(INDArray gradient, String paramType) {} /** * Update the score @@ -392,9 +222,7 @@ public abstract class AbstractLayer impl * @param workspaceMgr */ @Override - public void computeGradientAndScore(LayerWorkspaceMgr workspaceMgr) { - - } + public void computeGradientAndScore(LayerWorkspaceMgr workspaceMgr) {} /** * the number of parameters for the model @@ -407,15 +235,18 @@ public abstract class AbstractLayer impl return 0; } + @Override + public void setParam(String s, INDArray array) {} + /** - * Set the parameters for this model. This expects a linear ndarray which then be unpacked - * internally relative to the expected ordering of the model + * Get a parameter array for a given parameter type key * - * @param params the parameters for the model + * @param param the key of the parameter + * @return ndarray of parameters */ @Override - public void setParams(INDArray params) { - + public INDArray getParam(String param) { + return null; } /** @@ -425,21 +256,16 @@ public abstract class AbstractLayer impl * @param params a 1 x nParams row vector that is a view of the larger (MLN/CG) parameters array */ @Override - public void setParamsViewArray(INDArray params) { - - } + public void setParamsViewArray(INDArray params) {} /** * Set the gradients array as a view of the full (backprop) network parameters NOTE: this is * intended to be used internally in MultiLayerNetwork and ComputationGraph, not by users. * - * @param gradients a 1 x nParams row vector that is a view of the larger (MLN/CG) gradients - * array + * @param gradients a 1 x nParams row vector that is a view of the larger (MLN/CG) gradients array */ @Override - public void setBackpropGradientsViewArray(INDArray gradients) { - - } + public void setBackpropGradientsViewArray(INDArray gradients) {} /** * The current inputs batch size @@ -458,78 +284,28 @@ public abstract class AbstractLayer impl */ @Override public INDArray input() { - return null; + return this.input; } - /** - * Get a parameter array for a given parameter type key - * - * @param param the key of the parameter - * @return ndarray of parameters - */ + /** */ @Override - public INDArray getParam(String param) { - return null; - } - - - /** - * The param table - * - * @return - */ - @Override - public Map getParamTable() { - return null; - } - - /** - * Set the parameters for a given parameter type. - * - * @param key the param type key to set - * @param val the new parameters ndarray - */ - @Override - public void setParam(String key, INDArray val) { - - } - - /** - * - */ - @Override - public void close() { - - } + public void close() {} /** * Calculate the gradient relative to the error in the next layer * - * @param epsilon w^(L+1)*delta^(L+1). Or, equiv: dC/da, i.e., (dC/dz)*(dz/da) = dC/da, where - * C is cost function a=sigma(z) is activation. + * @param epsilon w^(L+1)*delta^(L+1). Or, equiv: dC/da, i.e., (dC/dz)*(dz/da) = dC/da, where C is + * cost function a=sigma(z) is activation. * @param workspaceMgr Workspace manager * @return Pair where Gradient is gradient for this layer, INDArray is epsilon - * (activation gradient) needed by next layer, but before element-wise multiply by sigmaPrime(z). - * So for standard feed-forward layer, if this layer is L, then return.getSecond() == dL/dIn = - * (w^(L)*(delta^(L))^T)^T. Note that the returned array should be placed in the - * {@link ArrayType#ACTIVATION_GRAD} workspace via the workspace manager + * (activation gradient) needed by next layer, but before element-wise multiply by + * sigmaPrime(z). So for standard feed-forward layer, if this layer is L, then + * return.getSecond() == dL/dIn = (w^(L)*(delta^(L))^T)^T. Note that the returned array should + * be placed in the {@link ArrayType#ACTIVATION_GRAD} workspace via the workspace manager */ @Override - public Pair backpropGradient(INDArray epsilon, - LayerWorkspaceMgr workspaceMgr) { - return null; - } - - /** - * Perform forward pass and return the activations array with the last set input - * - * @param training training or test mode - * @param workspaceMgr Workspace manager - * @return the activation (layer output) of the last specified input. Note that the returned array - * should be placed in the {@link ArrayType#ACTIVATIONS} workspace via the workspace manager - */ - @Override - public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) { + public Pair backpropGradient( + INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { return null; } @@ -543,13 +319,9 @@ public abstract class AbstractLayer impl return false; } - /** - * - */ + /** */ @Override - public void clearNoiseWeightParams() { - - } + public void clearNoiseWeightParams() {} public List variables() { return variables; @@ -562,33 +334,14 @@ public abstract class AbstractLayer impl return variables; } - /** - * The configuration for the neural network - * - * @return the configuration for the neural network - */ - @Override - public NeuralNetConfiguration getNetConfiguration() { - return layerConfiguration.getNetConfiguration(); - } - public void addVariable(String variable) { if (!variables.contains(variable)) { variables.add(variable); } } - /** - * Return the configuration of this layer - * - * @return the configuration - */ - @Override - public LayerConfiguration getLayerConfiguration() { - return layerConf(); - } - public void setLayerConfiguration(LayerConfiguration layerConfiguration) { + //noinspection unchecked this.layerConfiguration = (LayerConf_T) layerConfiguration; } @@ -601,57 +354,39 @@ public abstract class AbstractLayer impl this.cacheMode = mode; } - public LayerConf_T layerConf() { + public LayerConf_T getTypedLayerConfiguration() { return this.layerConfiguration; } @Override - public TrainingConfig getConfig() { - return layerConfiguration; + public ITraininableLayerConfiguration getTrainingConfig() { + return (ITraininableLayerConfiguration) getTypedLayerConfiguration(); } protected String layerId() { String name = this.layerConfiguration.getLayerName(); - return "(layer name: " + (name == null ? "\"\"" : name) + ", layer index: " + index - + ", layer type: " + - getClass().getSimpleName() + ")"; - } - - public INDArray getInput() { - return input; - } - - public int getEpochCount() { - return epochCount; - } - - public void setEpochCount(int epochCount) { - this.epochCount = epochCount; + return "(layer name: " + + (name == null ? "\"\"" : name) + + ", layer index: " + + index + + ", layer type: " + + getClass().getSimpleName() + + ")"; } @Override - public void setInput(INDArray input, LayerWorkspaceMgr workspaceMgr) { + public void setInput(@NonNull INDArray input, LayerWorkspaceMgr workspaceMgr) { this.input = workspaceMgr.leverageTo(ArrayType.INPUT, input); dropoutApplied = false; } - @Override - public int getIndex() { - return index; - } - - @Override - public void setIndex(int index) { - this.index = index; - } - /** * Returns the parameters of the neural network as a flattened row vector * * @return the parameters of the neural network */ @Override - public INDArray params() { + public INDArray getModelParams() { return null; } @@ -671,65 +406,60 @@ public abstract class AbstractLayer impl to.muliColumnVector(maskArray.castTo(to.dataType())); } - @Override - public INDArray activate(INDArray input, boolean training, LayerWorkspaceMgr workspaceMgr) { - setInput(input, workspaceMgr); - return activate(training, workspaceMgr); - } - @Override public double calcRegularizationScore(boolean backpropParamsOnly) { return 0.0; } - @Deprecated public void clear() { input = null; maskArray = null; maskState = null; - if (layerConf().getIDropout() != null) { - layerConf().getIDropout().clear(); + if (getTypedLayerConfiguration().getIDropout() != null) { + getTypedLayerConfiguration().getIDropout().clear(); } } protected void applyDropOutIfNecessary(boolean training, LayerWorkspaceMgr workspaceMgr) { - if (training && !dropoutApplied && layerConf().getIDropout() != null) { + if (training && !dropoutApplied && getTypedLayerConfiguration().getIDropout() != null) { INDArray result; if (inputModificationAllowed) { result = input; } else { - result = workspaceMgr.createUninitialized(ArrayType.INPUT, input.dataType(), input.shape(), - input.ordering()); + result = + workspaceMgr.createUninitialized( + ArrayType.INPUT, input.dataType(), input.shape(), input.ordering()); } - input = layerConf().getIDropout() - .applyDropout(input, result, getIterationCount(), getEpochCount(), workspaceMgr); + input = + getTypedLayerConfiguration() + .getIDropout() + .applyDropout(input, result, getIterationCount(), getEpochCount(), workspaceMgr); dropoutApplied = true; } } protected INDArray backpropDropOutIfPresent(INDArray epsilon) { - if (layerConf().getIDropout() != null) { - layerConf().getIDropout().backprop(epsilon, epsilon, getIterationCount(), getEpochCount()); + if (getTypedLayerConfiguration().getIDropout() != null) { + getTypedLayerConfiguration() + .getIDropout() + .backprop(epsilon, epsilon, getIterationCount(), getEpochCount()); } return epsilon; } - @Override public Type type() { return Type.FEED_FORWARD; } - public void fit(INDArray input, LayerWorkspaceMgr workspaceMgr) { throw new UnsupportedOperationException("Not supported"); } - public Pair gradientAndScore() { - return new Pair<>(gradient(), score()); + return new Pair<>(gradient(), getScore()); } @Override @@ -738,23 +468,13 @@ public abstract class AbstractLayer impl } @Override - public void setInputMiniBatchSize(int size) { - } + public void setInputMiniBatchSize(int size) {} @Override - public INDArray getMaskArray() { - return maskArray; - } - - @Override - public void setMaskArray(INDArray maskArray) { - this.maskArray = maskArray; - } - - @Override - public Pair feedForwardMaskArray(INDArray maskArray, - MaskState currentMaskState, int minibatchSize) { - //Most layers: CNN, dense, activation, etc - set mask array, mask state and then leave the mask unmodified + public Pair feedForwardMaskArray( + INDArray maskArray, MaskState currentMaskState, int minibatchSize) { + // Most layers: CNN, dense, activation, etc - set mask array, mask state and then leave the mask + // unmodified this.maskArray = maskArray; this.maskState = currentMaskState; @@ -762,28 +482,24 @@ public abstract class AbstractLayer impl return new Pair<>(maskArray, currentMaskState); } - public Gradient gradient() { throw new UnsupportedOperationException( "Not supported for this layer, or should be overridden for layers requiring it"); } - public void fit() { throw new UnsupportedOperationException( "Not supported for this layer, or should be overridden for layers requiring it"); } - - public double score() { + public double getScore() { throw new UnsupportedOperationException( "Not supported for this layer, or should be overridden for layers requiring it"); } - public void applyConstraints(int iteration, int epoch) { - if (layerConf().getConstraints() != null) { - for (LayerConstraint lc : layerConf().getConstraints()) { + if (getTypedLayerConfiguration().getConstraints() != null) { + for (LayerConstraint lc : getTypedLayerConfiguration().getConstraints()) { lc.applyConstraint(this, iteration, epoch); } } @@ -793,11 +509,13 @@ public abstract class AbstractLayer impl if (input == null) { if (backprop) { throw new IllegalStateException( - "Cannot perform backprop in layer " + getClass().getSimpleName() + "Cannot perform backprop in layer " + + getClass().getSimpleName() + ": layer input field is not set"); } else { throw new IllegalStateException( - "Cannot perform forward pass in layer " + getClass().getSimpleName() + "Cannot perform forward pass in layer " + + getClass().getSimpleName() + ": layer input field is not set"); } } @@ -810,14 +528,79 @@ public abstract class AbstractLayer impl @Override public LayerHelper getHelper() { - //Layers with helpers should override this method! + // Layers with helpers should override this method! return null; } @Override public boolean updaterDivideByMinibatch(String paramName) { - //Majority of params's gradients should be... Exception: batch norm mean/variance estimate + // Majority of params's gradients should be... Exception: batch norm mean/variance estimate return true; } + /** + * The AbstractLayer does not implement Params, ParamTable and GradientView. A RuntimeException + * will be triggered when calling this. + * + * @return + */ + @Override + public Map getParamTable() { + throw new RuntimeException("Not implemented"); + } + + /** + * * The AbstractLayer does not implement Params, ParamTable and GradientView. A RuntimeException + * * will be triggered when calling this. + * + * @param paramTable + */ + @Override + public void setParamTable(Map paramTable) { + throw new RuntimeException("Not implemented"); + } + + /** + * * The AbstractLayer does not implement Params, ParamTable and GradientView. A RuntimeException + * * will be triggered when calling this. + * + * @param isBackprop + * @return + */ + @Override + public Map getParamTable(boolean isBackprop) { + throw new RuntimeException("Not implemented"); + } + + /** + * * The AbstractLayer does not implement Params, ParamTable and GradientView. A RuntimeException + * * will be triggered when calling this. + * + * @return 1d parameter vector + */ + @Override + public INDArray getParams() { + // throw new RuntimeException("Not implemented"); + return null; + } + + /** + * Set the parameters for this model. This expects a linear ndarray which then be unpacked + * internally relative to the expected ordering of the model + * + * @param params the parameters for the model + */ + @Override + public void setParams(INDArray params) {} + + /** + * * The AbstractLayer does not implement Params, ParamTable and GradientView. A RuntimeException + * * will be triggered when calling this. + * + * @return 1D gradients view array + */ + @Override + public INDArray getGradientsViewArray() { + throw new RuntimeException("Not implemented"); + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/ActivationLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/ActivationLayer.java index 7043275a0..48df25694 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/ActivationLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/ActivationLayer.java @@ -21,7 +21,7 @@ package org.deeplearning4j.nn.layers; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import java.util.Map; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; @@ -38,6 +38,7 @@ public class ActivationLayer extends AbstractLayer backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { assertInputSet(true); INDArray temp = workspaceMgr.dup(ArrayType.ACTIVATION_GRAD, input, input.ordering()); - INDArray delta = layerConf().getActivationFn().backprop(temp, epsilon).getFirst(); //TODO handle activation function params + INDArray delta = getTypedLayerConfiguration().getActivationFn().backprop(temp, epsilon).getFirst(); //TODO handle activation function params if(delta == epsilon ){ //Edge case: identity activation + external errors -> no-op delta = workspaceMgr.dup(ArrayType.ACTIVATION_GRAD, delta); @@ -75,7 +76,7 @@ public class ActivationLayer extends AbstractLayer paramTable; + /** + * @param backpropOnly If true: return only parameters that are not exclusively used for layerwise + * pretraining + * @return Parameter table + */ + @Override + public Map getParamTable(boolean backpropOnly) { + return this.paramTable; + } + + /** + * @param map + */ + @Override + public void setParamTable(Map map) { + this.paramTable = map; + } @Override - public INDArray params() { + public INDArray getModelParams() { return null; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java index 68de26b7c..6363c77c5 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java @@ -21,19 +21,16 @@ package org.deeplearning4j.nn.layers; import java.lang.reflect.Constructor; -import java.util.Arrays; -import java.util.Collection; -import java.util.HashMap; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; +import java.util.*; import lombok.Getter; import lombok.NonNull; +import lombok.Setter; import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.exception.DL4JInvalidInputException; +import org.deeplearning4j.nn.api.ITrainableLayer; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.BaseLayerConfiguration; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; @@ -58,14 +55,31 @@ import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.NDArrayIndex; import org.nd4j.linalg.learning.regularization.Regularization; -/** - * A layer with parameters - * - * @author Adam Gibson - */ +/** A layer with parameters */ @Slf4j -public abstract class BaseLayer - extends AbstractLayer { +public abstract class BaseLayer + extends AbstractLayer implements ITrainableLayer { + + protected double score = 0.0; + protected ConvexOptimizer optimizer; + protected Gradient gradient; + protected Solver solver; + protected Map weightNoiseParams = new HashMap<>(); + protected INDArray paramsFlattened; + protected INDArray gradientsFlattened; + + @Getter @Setter protected Map paramTable; + + @Getter protected transient Map gradientViews; + + /** + * we put this as a virtual function to access the models paramTable. @Getter @Setter private + * INDArray params; + */ + public BaseLayer(LayerConfiguration conf, DataType dataType) { + + super(conf, dataType); + } /** * This method executes evaluation of the model against given iterator and evaluation @@ -91,31 +105,9 @@ public abstract class BaseLayer weightNoiseParams = new HashMap<>(); - protected INDArray paramsFlattened; - protected INDArray gradientsFlattened; - /** - * Full table of parameters - */ - protected Map paramsTable; - @Getter protected transient Map gradientViews; - - public BaseLayer(LayerConfiguration conf, DataType dataType) { - super(conf, dataType); - } - - /** * and others even use \epsilon (epsilon) * http://web.cs.swarthmore.edu/~meeden/cs81/s10/BackPropDeriv.pdf * - * @param epsilon w^(L+1)*delta^(L+1). Or, equiv: dC/da, i.e., (dC/dz)*(dz/da) = dC/da, where - * C is cost function a=sigma(z) is activation. + * @param epsilon w^(L+1)*delta^(L+1). Or, equiv: dC/da, i.e., (dC/dz)*(dz/da) = dC/da, where C is + * cost function a=sigma(z) is activation. * @param workspaceMgr Workspace manager * @return */ @Override - public Pair backpropGradient(INDArray epsilon, - LayerWorkspaceMgr workspaceMgr) { + public Pair backpropGradient( + INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { assertInputSet(true); - //If this layer is layer L, then epsilon is (w^(L+1)*(d^(L+1))^T) (or equivalent) + // If this layer is layer L, then epsilon is (w^(L+1)*(d^(L+1))^T) (or equivalent) Pair zAndPreNorm = preOutputWithPreNorm(true, true, workspaceMgr); - INDArray z = zAndPreNorm.getFirst(); //Note: using preOutput(INDArray) can't be used as this does a setInput(input) and resets the 'appliedDropout' flag + INDArray z = + zAndPreNorm.getFirst(); // Note: using preOutput(INDArray) can't be used as this does a + // setInput(input) and resets the 'appliedDropout' flag INDArray preNorm = zAndPreNorm.getSecond(); - INDArray delta = layerConf().getActivationFn().backprop(z, epsilon) - .getFirst(); //TODO handle activation function params + INDArray delta = + getTypedLayerConfiguration() + .getActivationFn() + .backprop(z, epsilon) + .getFirst(); // TODO handle activation function params if (maskArray != null) { applyMask(delta); @@ -317,29 +264,39 @@ public abstract class BaseLayer(ret, epsilonNext); } - public void computeGradientAndScore(LayerWorkspaceMgr workspaceMgr) { - if (this.input == null) { + if (getInput() == null) { log.warn("There is no input for this layer '{}'", layerConfiguration); return; } @@ -358,18 +314,15 @@ public abstract class BaseLayer parameterList = layerConfiguration.getVariables(); //netWideVariables(); + if (params == null) { + log.warn( + "setParams(INDArray params, char order): params is null. Skipping setParams in Layer {}[{}] at index {}", + getLayerConfiguration().getLayerName(), + getClass().getSimpleName(), + getIndex()); + return; + } + List parameterList = layerConfiguration.getVariables(); // netWideVariables(); int length = 0; - for (String s : parameterList) { - length += getParam(s).length(); - } - if (params.length() != length) { - throw new IllegalArgumentException("Unable to set parameters: must be of length " + length - + ", got params of length " + params.length() + " - " + layerId()); - } + for (String s : parameterList) { + length += getParam(s).length(); + } + if (params.length() != length) { + throw new IllegalArgumentException( + "Unable to set parameters: must be of length " + + length + + ", got params of length " + + params.length() + + " - " + + layerId()); + } int idx = 0; Set paramKeySet = this.getParamTable().keySet(); for (String s : paramKeySet) { INDArray param = getParam(s); - INDArray get = params.get(NDArrayIndex.point(0), - NDArrayIndex.interval(idx, idx + param.length())); - if (param.length() != get.length()) { - throw new IllegalStateException( - "Parameter " + s + " should have been of length " + param.length() - + " but was " + get.length() + " - " + layerId()); - } - param.assign(get.reshape(order, - param.shape())); //Use assign due to backprop params being a view of a larger array + INDArray get = + params.get(NDArrayIndex.point(0), NDArrayIndex.interval(idx, idx + param.length())); + if (param.length() != get.length()) { + throw new IllegalStateException( + "Parameter " + + s + + " should have been of length " + + param.length() + + " but was " + + get.length() + + " - " + + layerId()); + } + param.assign( + get.reshape( + order, + param.shape())); // Use assign due to backprop params being a view of a larger array idx += param.length(); } } @Override public void setParamsViewArray(INDArray params) { - if (this.paramsTable != null && params.length() != numParams()) { - throw new IllegalArgumentException("Invalid input: expect params of length " + numParams() - + ", got params of length " + params.length() + " - " + layerId()); - } - + if (this.getParamTable() != null && params.length() != numParams()) { + throw new IllegalArgumentException( + "Invalid input: expect params of length " + + numParams() + + ", got params of length " + + params.length() + + " - " + + layerId()); + } this.paramsFlattened = params; } + @Override + public Map getParamTable(boolean isBackprop) { + return paramTable; + } + @Override public INDArray getGradientsViewArray() { return gradientsFlattened; @@ -450,15 +431,19 @@ public abstract class BaseLayer 0 && weightNoiseParams.containsKey(param)) { - //Re-use these weights for both forward pass and backprop - don't want to use 2 different params here - //These should be cleared during backprop + // Re-use these weights for both forward pass and backprop - don't want to use 2 different + // params here + // These should be cleared during backprop return weightNoiseParams.get(param); } else { try (MemoryWorkspace ws = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { - p = layerConf().getWeightNoise() - .getParameter(this, param, getIterationCount(), getEpochCount(), training, - workspaceMgr); + p = + lconf + .getWeightNoise() + .getParameter( + this, param, getIterationCount(), getEpochCount(), training, workspaceMgr); } } if (training) { - //Store for re-use in backprop + // Store for re-use in backprop weightNoiseParams.put(param, p); } } else { @@ -502,34 +491,45 @@ public abstract class BaseLayer preOutputWithPreNorm(boolean training, boolean forBackprop, - LayerWorkspaceMgr workspaceMgr) { + protected Pair preOutputWithPreNorm( + boolean training, boolean forBackprop, @NonNull LayerWorkspaceMgr workspaceMgr) { assertInputSet(forBackprop); applyDropOutIfNecessary(training, workspaceMgr); INDArray W = getParamWithNoise(DefaultParamInitializer.WEIGHT_KEY, training, workspaceMgr); INDArray b = getParamWithNoise(DefaultParamInitializer.BIAS_KEY, training, workspaceMgr); INDArray g = (hasLayerNorm() ? getParam(DefaultParamInitializer.GAIN_KEY) : null); - INDArray input = this.input.castTo(dataType); + INDArray input = getInput().castTo(dataType); - //Input validation: + // Input validation: if (input.rank() != 2 || input.columns() != W.rows()) { if (input.rank() != 2) { throw new DL4JInvalidInputException( "Input that is not a matrix; expected matrix (rank 2), got rank " - + input.rank() + " array with shape " + Arrays.toString(input.shape()) - + ". Missing preprocessor or wrong input type? " + layerId()); + + input.rank() + + " array with shape " + + Arrays.toString(input.shape()) + + ". Missing preprocessor or wrong input type? " + + layerId()); } throw new DL4JInvalidInputException( - "Input size (" + input.columns() + " columns; shape = " + Arrays.toString(input.shape()) + "Input size (" + + input.columns() + + " columns; shape = " + + Arrays.toString(input.shape()) + ") is invalid: does not match layer input size (layer # inputs = " - + W.size(0) + ") " + layerId()); + + W.size(0) + + ") " + + layerId()); } - INDArray ret = workspaceMgr.createUninitialized(ArrayType.ACTIVATIONS, W.dataType(), - input.size(0), W.size(1)); - input.castTo(ret.dataType()).mmuli(W, - ret); //TODO Can we avoid this cast? (It sohuld be a no op if not required, however) + INDArray ret = + workspaceMgr.createUninitialized( + ArrayType.ACTIVATIONS, W.dataType(), input.size(0), W.size(1)); + input + .castTo(ret.dataType()) + .mmuli( + W, ret); // TODO Can we avoid this cast? (It sohuld be a no op if not required, however) INDArray preNorm = ret; if (hasLayerNorm()) { @@ -550,8 +550,8 @@ public abstract class BaseLayer e : paramsTable.entrySet()) { - List l = layerConf().getRegularizationByParam(e.getKey()); + for (Map.Entry e : getParamTable().entrySet()) { + List l = getTypedLayerConfiguration().getRegularizationByParam(e.getKey()); if (l == null || l.isEmpty()) { continue; } @@ -582,7 +589,7 @@ public abstract class BaseLayer linkedTable = new LinkedHashMap<>(); - for (Map.Entry entry : paramsTable.entrySet()) { + for (Map.Entry entry : getParamTable().entrySet()) { linkedTable.put(entry.getKey(), entry.getValue().dup()); } layer.setParamTable(linkedTable); @@ -591,10 +598,8 @@ public abstract class BaseLayer gradientAndScore() { - return new Pair<>(gradient(), score()); + return new Pair<>(gradient(), getScore()); } @Override @@ -167,10 +166,10 @@ public abstract class BaseOutputLayer getGradientsAndDelta(INDArray preOut, LayerWorkspaceMgr workspaceMgr) { - ILossFunction lossFunction = layerConf().getLossFn(); + ILossFunction lossFunction = getTypedLayerConfiguration().getLossFn(); INDArray labels2d = getLabels2d(workspaceMgr, ArrayType.BP_WORKING_MEM); //INDArray delta = lossFunction.computeGradient(labels2d, preOut, layerConf().getActivationFunction(), maskArray); - INDArray delta = lossFunction.computeGradient(labels2d, preOut, layerConf().getActivationFn(), maskArray); + INDArray delta = lossFunction.computeGradient(labels2d, preOut, getTypedLayerConfiguration().getActivationFn(), maskArray); Gradient gradient = new DefaultGradient(); @@ -350,6 +349,6 @@ public abstract class BaseOutputLayer(zeroGradient, underlying.score()); + return new Pair<>(zeroGradient, underlying.getScore()); } @Override @@ -199,9 +199,9 @@ public class FrozenLayer extends BaseWrapperLayer { } @Override - public TrainingConfig getConfig(){ + public ITraininableLayerConfiguration getTrainingConfig(){ if (config == null) { - config = new DummyConfig(getUnderlying().getConfig().getLayerName()); + config = new DummyConfig(getUnderlying().getTrainingConfig().getLayerName()); } return config; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/FrozenLayerWithBackprop.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/FrozenLayerWithBackprop.java index 425ec454f..9cf762798 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/FrozenLayerWithBackprop.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/FrozenLayerWithBackprop.java @@ -42,7 +42,7 @@ public class FrozenLayerWithBackprop extends BaseWrapperLayer { public FrozenLayerWithBackprop(final Layer insideLayer) { super(insideLayer); - this.zeroGradient = new DefaultGradient(insideLayer.params()); + this.zeroGradient = new DefaultGradient(insideLayer.getParams()); } protected String layerId() { @@ -58,7 +58,7 @@ public class FrozenLayerWithBackprop extends BaseWrapperLayer { @Override public Pair backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { INDArray backpropEpsilon = underlying.backpropGradient(epsilon, workspaceMgr).getSecond(); - //backprop might have already changed the gradient view (like BaseLayer and BaseOutputLayer do) + //backprop might have already changed the gradient view (like BaseLayerConfiguration and BaseOutputLayer do) //so we want to put it back to zeroes INDArray gradientView = underlying.getGradientsViewArray(); if(gradientView != null){ @@ -72,12 +72,6 @@ public class FrozenLayerWithBackprop extends BaseWrapperLayer { return underlying.activate(false, workspaceMgr); } - @Override - public INDArray activate(INDArray input, boolean training, LayerWorkspaceMgr workspaceMgr) { - logTestMode(training); - return underlying.activate(input, false, workspaceMgr); - } - @Override public void fit() { if (!logFit) { @@ -112,7 +106,7 @@ public class FrozenLayerWithBackprop extends BaseWrapperLayer { "Gradients for the frozen layer are not set and will therefore will not be updated.Warning will be issued only once per instance"); logGradient = true; } - underlying.score(); + underlying.getScore(); //no op } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/LossLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/LossLayer.java index e13a06219..43bbc69d8 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/LossLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/LossLayer.java @@ -23,7 +23,6 @@ package org.deeplearning4j.nn.layers; import org.deeplearning4j.eval.Evaluation; import org.deeplearning4j.nn.api.layers.IOutputLayer; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; @@ -33,7 +32,6 @@ import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.api.DataSet; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; -import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.lossfunctions.ILossFunction; import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.util.FeatureUtil; @@ -72,10 +70,10 @@ public class LossLayer extends BaseLayer gradientAndScore() { - return new Pair<>(gradient(), score()); + return new Pair<>(gradient(), getScore()); } @Override @@ -135,8 +133,8 @@ public class LossLayer extends BaseLayer getGradientsAndDelta(INDArray preOut, LayerWorkspaceMgr workspaceMgr) { // delta calculation - ILossFunction lossFunction = layerConf().getLossFn(); - INDArray delta = lossFunction.computeGradient(getLabels2d(), preOut, layerConf().getActivationFn(), maskArray); + ILossFunction lossFunction = getTypedLayerConfiguration().getLossFn(); + INDArray delta = lossFunction.computeGradient(getLabels2d(), preOut, getTypedLayerConfiguration().getActivationFn(), maskArray); // grab the empty gradient Gradient gradient = new DefaultGradient(); @@ -172,7 +170,7 @@ public class LossLayer extends BaseLayer fwd = preOutput(false,true,workspaceMgr); - IActivation afn = layerConf().getActivationFn(); + IActivation afn = getTypedLayerConfiguration().getActivationFn(); INDArray delta = afn.backprop(fwd.getFirst(), epsilon).getFirst(); //TODO handle activation function params - org.deeplearning4j.nn.conf.layers.Convolution1DLayer c = layerConf(); + org.deeplearning4j.nn.conf.layers.Convolution1DLayer c = getTypedLayerConfiguration(); Conv1DConfig conf = Conv1DConfig.builder() .k(c.getKernelSize()[0]) .s(c.getStride()[0]) @@ -86,11 +85,11 @@ public class Convolution1DLayer extends ConvolutionLayer { getRnnDataFormat()); INDArray epsOut = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, input.dataType(), input.shape()); INDArray input = this.input.castTo(dataType); - if(layerConf().getRnnDataFormat() == RNNFormat.NWC) { + if(getTypedLayerConfiguration().getRnnDataFormat() == RNNFormat.NWC) { input = input.permute(0,2,1); //NHWC to NCHW } - if(layerConf().hasBias()) { + if(getTypedLayerConfiguration().hasBias()) { INDArray b = getParam(ConvolutionParamInitializer.BIAS_KEY); b = b.reshape(b.length()); inputArrs = new INDArray[]{input, w, b, delta}; @@ -106,7 +105,7 @@ public class Convolution1DLayer extends ConvolutionLayer { Nd4j.exec(op); Gradient retGradient = new DefaultGradient(); - if(layerConf().hasBias()) { + if(getTypedLayerConfiguration().hasBias()) { retGradient.setGradientFor(ConvolutionParamInitializer.BIAS_KEY, gradientViews.get(ConvolutionParamInitializer.BIAS_KEY)); } retGradient.setGradientFor(ConvolutionParamInitializer.WEIGHT_KEY, gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY), 'c'); @@ -130,11 +129,11 @@ public class Convolution1DLayer extends ConvolutionLayer { assertInputSet(false); INDArray input = this.input.castTo(dataType); - if(layerConf().getRnnDataFormat() == RNNFormat.NWC) { + if(getTypedLayerConfiguration().getRnnDataFormat() == RNNFormat.NWC) { input = input.permute(0,2,1); //NHWC to NCHW } - org.deeplearning4j.nn.conf.layers.Convolution1DLayer c = layerConf(); + org.deeplearning4j.nn.conf.layers.Convolution1DLayer c = getTypedLayerConfiguration(); Conv1DConfig conf = Conv1DConfig.builder() .k(c.getKernelSize()[0]) .s(c.getStride()[0]) @@ -151,7 +150,7 @@ public class Convolution1DLayer extends ConvolutionLayer { INDArray[] inputs; - if(layerConf().hasBias()) { + if(getTypedLayerConfiguration().hasBias()) { INDArray b = getParam(ConvolutionParamInitializer.BIAS_KEY); b = b.reshape(b.length()); inputs = new INDArray[]{input, w, b}; @@ -193,18 +192,18 @@ public class Convolution1DLayer extends ConvolutionLayer { @Override public Pair feedForwardMaskArray(INDArray maskArray, MaskState currentMaskState, int minibatchSize) { - INDArray reduced = ConvolutionUtils.cnn1dMaskReduction(maskArray, layerConf().getKernelSize()[0], - layerConf().getStride()[0], layerConf().getPadding()[0], layerConf().getDilation()[0], - layerConf().getConvolutionMode()); + INDArray reduced = ConvolutionUtils.cnn1dMaskReduction(maskArray, getTypedLayerConfiguration().getKernelSize()[0], + getTypedLayerConfiguration().getStride()[0], getTypedLayerConfiguration().getPadding()[0], getTypedLayerConfiguration().getDilation()[0], + getTypedLayerConfiguration().getConvolutionMode()); return new Pair<>(reduced, currentMaskState); } @Override - public org.deeplearning4j.nn.conf.layers.Convolution1DLayer layerConf() { + public org.deeplearning4j.nn.conf.layers.Convolution1DLayer getTypedLayerConfiguration() { return (org.deeplearning4j.nn.conf.layers.Convolution1DLayer)layerConfiguration; } private RNNFormat getRnnDataFormat(){ - return layerConf().getRnnDataFormat(); + return getTypedLayerConfiguration().getRnnDataFormat(); } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Convolution3DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Convolution3DLayer.java index 0edb2ed3b..184c46723 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Convolution3DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Convolution3DLayer.java @@ -22,7 +22,6 @@ package org.deeplearning4j.nn.layers.convolution; import org.deeplearning4j.exception.DL4JInvalidInputException; import org.deeplearning4j.nn.conf.ConvolutionMode; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.Convolution3D; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; @@ -67,7 +66,7 @@ public class Convolution3DLayer extends ConvolutionLayer { INDArray input = this.input.castTo(dataType); INDArray weights = getParamWithNoise(Convolution3DParamInitializer.WEIGHT_KEY, true, workspaceMgr); - Convolution3D layerConfig = (Convolution3D) layerConf(); + Convolution3D layerConfig = (Convolution3D) getTypedLayerConfiguration(); boolean isNCDHW = layerConfig.getDataFormat() == Convolution3D.DataFormat.NCDHW; @@ -76,7 +75,7 @@ public class Convolution3DLayer extends ConvolutionLayer { int inH = (int) (isNCDHW ? input.size(3) : input.size(2)); int inW = (int) (isNCDHW ? input.size(4) : input.size(3)); - int outEpsChannels = (int) layerConf().getNIn(); + int outEpsChannels = (int) getTypedLayerConfiguration().getNIn(); int[] dilation = layerConfig.getDilation(); int[] kernel = layerConfig.getKernelSize(); @@ -165,7 +164,7 @@ public class Convolution3DLayer extends ConvolutionLayer { protected Pair preOutput(boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) { - Convolution3D layerConfig = (Convolution3D) layerConf(); + Convolution3D layerConfig = (Convolution3D) getTypedLayerConfiguration(); ConvolutionMode mode = layerConfig.getConvolutionMode(); boolean isNCDHW = layerConfig.getDataFormat() == Convolution3D.DataFormat.NCDHW; @@ -194,8 +193,8 @@ public class Convolution3DLayer extends ConvolutionLayer { int inH = (int) (isNCDHW ? input.size(3) : input.size(2)); int inW = (int) (isNCDHW ? input.size(4) : input.size(3)); - int outWeightChannels = (int)layerConf().getNOut(); - int inWeightChannels = (int)layerConf().getNIn(); + int outWeightChannels = (int) getTypedLayerConfiguration().getNOut(); + int inWeightChannels = (int) getTypedLayerConfiguration().getNIn(); if (inputChannels != inWeightChannels) { String layerName = layerConfiguration.getLayerName(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayer.java index ffd36652a..c1a94338a 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayer.java @@ -28,7 +28,6 @@ import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.CacheMode; import org.deeplearning4j.nn.conf.ConvolutionMode; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; @@ -82,7 +81,7 @@ public class ConvolutionLayer extends BaseLayer p = preOutput4d(true, true, workspaceMgr); INDArray z = p.getFirst(); - CNN2DFormat f = layerConf().getCnn2dDataFormat(); + CNN2DFormat f = getTypedLayerConfiguration().getCnn2dDataFormat(); if(f != CNN2DFormat.NCHW){ z = z.permute(0,3,1,2); //NHWC to NCHW } delta = afn.backprop(z, epsilon).getFirst(); //TODO handle activation function params - if (helper != null && (helperCountFail == 0 || !layerConf().isCudnnAllowFallback())) { + if (helper != null && (helperCountFail == 0 || !getTypedLayerConfiguration().isCudnnAllowFallback())) { INDArray helperDelta = delta; - if(layerConf().getCnn2dDataFormat() == CNN2DFormat.NHWC) + if(getTypedLayerConfiguration().getCnn2dDataFormat() == CNN2DFormat.NHWC) helperDelta = delta.permute(0,2,3,1); //NCHW to NHWC if(!hasBias() && !(helper instanceof MKLDNNConvHelper)){ //MKL-DNN supports no bias, CuDNN doesn't if(dummyBiasGrad == null){ try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { - dummyBiasGrad = Nd4j.create(1, layerConf().getNOut()); + dummyBiasGrad = Nd4j.create(1, getTypedLayerConfiguration().getNOut()); } } biasGradView = dummyBiasGrad; @@ -177,8 +176,8 @@ public class ConvolutionLayer extends BaseLayer(preOutput, null); } @@ -413,7 +412,7 @@ public class ConvolutionLayer extends BaseLayer addiRowVector - if(layerConf().hasBias()){ + if(getTypedLayerConfiguration().hasBias()){ z.addiRowVector(bias); } @@ -499,7 +498,7 @@ public class ConvolutionLayer extends BaseLayer(maskArray, currentMaskState); } - INDArray outMask = ConvolutionUtils.cnn2dMaskReduction(maskArray, layerConf().getKernelSize(), layerConf().getStride(), - layerConf().getPadding(), layerConf().getDilation(), layerConf().getConvolutionMode()); + INDArray outMask = ConvolutionUtils.cnn2dMaskReduction(maskArray, getTypedLayerConfiguration().getKernelSize(), getTypedLayerConfiguration().getStride(), + getTypedLayerConfiguration().getPadding(), getTypedLayerConfiguration().getDilation(), getTypedLayerConfiguration().getConvolutionMode()); return new Pair<>(outMask, currentMaskState); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Cropping1DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Cropping1DLayer.java index ac29715e0..94f752a6e 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Cropping1DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Cropping1DLayer.java @@ -20,14 +20,17 @@ package org.deeplearning4j.nn.layers.convolution; +import java.util.Map; import lombok.val; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; + import org.deeplearning4j.nn.conf.layers.convolutional.Cropping1D; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.AbstractLayer; +import org.deeplearning4j.nn.layers.BaseLayer; import org.deeplearning4j.nn.workspace.ArrayType; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.nd4j.linalg.api.buffer.DataType; @@ -98,4 +101,5 @@ public class Cropping1DLayer extends AbstractLayer { } } } + } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Cropping2DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Cropping2DLayer.java index d72d2f3eb..83f17f216 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Cropping2DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Cropping2DLayer.java @@ -23,7 +23,6 @@ package org.deeplearning4j.nn.layers.convolution; import lombok.val; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.conf.CNN2DFormat; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; @@ -92,7 +91,7 @@ public class Cropping2DLayer extends AbstractLayer p = preOutput4d(true, true, workspaceMgr); delta = afn.backprop(p.getFirst(), epsilon).getFirst(); @@ -119,7 +118,7 @@ public class Deconvolution2DLayer extends ConvolutionLayer { INDArray[] opInputs; INDArray[] opOutputs; - if(layerConf().hasBias()){ + if(getTypedLayerConfiguration().hasBias()){ INDArray bias = getParamWithNoise(DeconvolutionParamInitializer.BIAS_KEY, true, workspaceMgr); opInputs = new INDArray[]{input, weights, bias, delta}; opOutputs = new INDArray[]{outEps, weightGradViewOp, biasGradView}; @@ -137,7 +136,7 @@ public class Deconvolution2DLayer extends ConvolutionLayer { Gradient retGradient = new DefaultGradient(); - if(layerConf().hasBias()){ + if(getTypedLayerConfiguration().hasBias()){ retGradient.setGradientFor(DeconvolutionParamInitializer.BIAS_KEY, biasGradView); } retGradient.setGradientFor(DeconvolutionParamInitializer.WEIGHT_KEY, weightGradView, 'c'); @@ -167,7 +166,7 @@ public class Deconvolution2DLayer extends ConvolutionLayer { + " " + layerId()); } - CNN2DFormat format = layerConf().getCnn2dDataFormat(); + CNN2DFormat format = getTypedLayerConfiguration().getCnn2dDataFormat(); boolean nchw = format == CNN2DFormat.NCHW; int cDim = nchw ? 1 : 3; int hDim = nchw ? 2 : 1; @@ -199,9 +198,9 @@ public class Deconvolution2DLayer extends ConvolutionLayer { int kH = (int) weights.size(2); int kW = (int) weights.size(3); - int[] dilation = layerConf().getDilation(); - int[] kernel = layerConf().getKernelSize(); - int[] strides = layerConf().getStride(); + int[] dilation = getTypedLayerConfiguration().getDilation(); + int[] kernel = getTypedLayerConfiguration().getKernelSize(); + int[] strides = getTypedLayerConfiguration().getStride(); int[] pad; int[] outSize; @@ -210,7 +209,7 @@ public class Deconvolution2DLayer extends ConvolutionLayer { pad = ConvolutionUtils.getSameModeTopLeftPadding(outSize, new int[] {(int) input.size(hDim), (int) input.size(wDim)}, kernel, strides, dilation ); } else { - pad = layerConf().getPadding(); + pad = getTypedLayerConfiguration().getPadding(); outSize = ConvolutionUtils.getDeconvolutionOutputSize(input, kernel, strides, pad, convolutionMode, dilation, format); //Also performs validation } @@ -235,7 +234,7 @@ public class Deconvolution2DLayer extends ConvolutionLayer { weights = weights.permute(2, 3, 1, 0); INDArray[] opInputs; - if (layerConf().hasBias()) { + if (getTypedLayerConfiguration().hasBias()) { opInputs = new INDArray[]{input, weights, bias}; } else { opInputs = new INDArray[]{input, weights}; @@ -262,10 +261,10 @@ public class Deconvolution2DLayer extends ConvolutionLayer { INDArray z = preOutput(training, false, workspaceMgr).getFirst(); - IActivation afn = layerConf().getActivationFn(); + IActivation afn = getTypedLayerConfiguration().getActivationFn(); if (helper != null && Shape.strideDescendingCAscendingF(z)) { - INDArray ret = helper.activate(z, layerConf().getActivationFn(), training); + INDArray ret = helper.activate(z, getTypedLayerConfiguration().getActivationFn(), training); if (ret != null) { return ret; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Deconvolution3DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Deconvolution3DLayer.java index 302522ed3..a14414d03 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Deconvolution3DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Deconvolution3DLayer.java @@ -24,7 +24,6 @@ import lombok.val; import org.deeplearning4j.exception.DL4JInvalidInputException; import org.deeplearning4j.nn.conf.CacheMode; import org.deeplearning4j.nn.conf.ConvolutionMode; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.Convolution3D; import org.deeplearning4j.nn.conf.layers.Deconvolution3D; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; @@ -64,20 +63,20 @@ public class Deconvolution3DLayer extends BaseLayer { INDArray weights = getParamWithNoise(DeconvolutionParamInitializer.WEIGHT_KEY, true, workspaceMgr); - Convolution3D.DataFormat df = layerConf().getDataFormat(); - ConvolutionMode cm = layerConf().getConvolutionMode(); + Convolution3D.DataFormat df = getTypedLayerConfiguration().getDataFormat(); + ConvolutionMode cm = getTypedLayerConfiguration().getConvolutionMode(); - int[] dilation = layerConf().getDilation(); - int[] kernel = layerConf().getKernelSize(); - int[] strides = layerConf().getStride(); - int[] pad = layerConf().getPadding(); + int[] dilation = getTypedLayerConfiguration().getDilation(); + int[] kernel = getTypedLayerConfiguration().getKernelSize(); + int[] strides = getTypedLayerConfiguration().getStride(); + int[] pad = getTypedLayerConfiguration().getPadding(); INDArray biasGradView = gradientViews.get(DeconvolutionParamInitializer.BIAS_KEY); INDArray weightGradView = gradientViews.get(DeconvolutionParamInitializer.WEIGHT_KEY); INDArray outEps = workspaceMgr.create(ArrayType.ACTIVATION_GRAD, weights.dataType(), input.shape(), 'c'); - Integer sameMode = (layerConf().getConvolutionMode() == ConvolutionMode.Same) ? 1 : 0; + Integer sameMode = (getTypedLayerConfiguration().getConvolutionMode() == ConvolutionMode.Same) ? 1 : 0; int[] args = new int[] { kernel[0], kernel[1], kernel[2], strides[0], strides[1], strides[2], @@ -86,13 +85,13 @@ public class Deconvolution3DLayer extends BaseLayer { }; INDArray delta; - IActivation afn = layerConf().getActivationFn(); + IActivation afn = getTypedLayerConfiguration().getActivationFn(); INDArray preOutput = preOutput(true, workspaceMgr); delta = afn.backprop(preOutput, epsilon).getFirst(); INDArray[] opInputs; INDArray[] opOutputs; - if(layerConf().hasBias()){ + if(getTypedLayerConfiguration().hasBias()){ INDArray bias = getParamWithNoise(DeconvolutionParamInitializer.BIAS_KEY, true, workspaceMgr); opInputs = new INDArray[]{input, weights, bias, delta}; opOutputs = new INDArray[]{outEps, weightGradView, biasGradView}; @@ -110,7 +109,7 @@ public class Deconvolution3DLayer extends BaseLayer { Gradient retGradient = new DefaultGradient(); - if(layerConf().hasBias()){ + if(getTypedLayerConfiguration().hasBias()){ retGradient.setGradientFor(DeconvolutionParamInitializer.BIAS_KEY, biasGradView); } retGradient.setGradientFor(DeconvolutionParamInitializer.WEIGHT_KEY, weightGradView, 'c'); @@ -132,34 +131,34 @@ public class Deconvolution3DLayer extends BaseLayer { " [minibatchSize, inputHeight, inputWidth, inputDepth, channels]. " + layerId()); } - Convolution3D.DataFormat df = layerConf().getDataFormat(); - boolean ncdhw = layerConf().getDataFormat() == Convolution3D.DataFormat.NCDHW; + Convolution3D.DataFormat df = getTypedLayerConfiguration().getDataFormat(); + boolean ncdhw = getTypedLayerConfiguration().getDataFormat() == Convolution3D.DataFormat.NCDHW; int chDim = ncdhw ? 1 : 4; - if (input.size(chDim) != layerConf().getNIn() ) { + if (input.size(chDim) != getTypedLayerConfiguration().getNIn() ) { String layerName = getLayerConfiguration().getLayerName(); if (layerName == null) layerName = "(not named)"; throw new DL4JInvalidInputException("Cannot do forward pass in Deconvolution3D layer (layer name = " + layerName + ", layer index = " + index + "): input array channels does not match CNN layer configuration" + " (data input channels = " + input.size(chDim) + ", " + (ncdhw ? "[minibatch,channels,height,width,depth]=" : "[minibatch,height,width,depth,channels]=") - + Arrays.toString(input.shape()) + "; expected" + " input channels = " + layerConf().getNIn() + ") " + + Arrays.toString(input.shape()) + "; expected" + " input channels = " + getTypedLayerConfiguration().getNIn() + ") " + layerId()); } - int[] dilation = layerConf().getDilation(); - int[] kernel = layerConf().getKernelSize(); - int[] strides = layerConf().getStride(); + int[] dilation = getTypedLayerConfiguration().getDilation(); + int[] kernel = getTypedLayerConfiguration().getKernelSize(); + int[] strides = getTypedLayerConfiguration().getStride(); int[] pad; - ConvolutionMode cm = layerConf().getConvolutionMode(); + ConvolutionMode cm = getTypedLayerConfiguration().getConvolutionMode(); long[] outSize; int[] inSize = df == Convolution3D.DataFormat.NCDHW ? new int[]{(int)input.size(2), (int)input.size(3), (int)input.size(4)} : new int[]{(int)input.size(1), (int)input.size(2), (int)input.size(3)}; if (cm == ConvolutionMode.Same) { - outSize = ConvolutionUtils.getDeconvolution3DOutputSize(input, kernel, strides, null, dilation, cm, layerConf().getDataFormat()); //Also performs validation + outSize = ConvolutionUtils.getDeconvolution3DOutputSize(input, kernel, strides, null, dilation, cm, getTypedLayerConfiguration().getDataFormat()); //Also performs validation pad = ConvolutionUtils.getSameModeTopLeftPadding(ArrayUtil.toInts(outSize), inSize, kernel, strides, dilation ); } else { - pad = layerConf().getPadding(); - outSize = ConvolutionUtils.getDeconvolution3DOutputSize(input, kernel, strides, pad, dilation, cm, layerConf().getDataFormat()); //Also performs validation + pad = getTypedLayerConfiguration().getPadding(); + outSize = ConvolutionUtils.getDeconvolution3DOutputSize(input, kernel, strides, pad, dilation, cm, getTypedLayerConfiguration().getDataFormat()); //Also performs validation } long outH = outSize[0]; @@ -168,7 +167,7 @@ public class Deconvolution3DLayer extends BaseLayer { val miniBatch = input.size(0); - long[] outShape = df == Convolution3D.DataFormat.NCDHW ? new long[]{miniBatch, layerConf().getNOut(), outH, outW, outD} : new long[]{miniBatch, outH, outW, outD, layerConf().getNOut()}; + long[] outShape = df == Convolution3D.DataFormat.NCDHW ? new long[]{miniBatch, getTypedLayerConfiguration().getNOut(), outH, outW, outD} : new long[]{miniBatch, outH, outW, outD, getTypedLayerConfiguration().getNOut()}; INDArray output = workspaceMgr.create(ArrayType.ACTIVATIONS, input.dataType(), outShape, 'c'); int sameMode = (cm == ConvolutionMode.Same) ? 1 : 0; @@ -180,7 +179,7 @@ public class Deconvolution3DLayer extends BaseLayer { }; INDArray[] opInputs; - if (layerConf().hasBias()) { + if (getTypedLayerConfiguration().hasBias()) { opInputs = new INDArray[]{input, weights, bias}; } else { opInputs = new INDArray[]{input, weights}; @@ -207,7 +206,7 @@ public class Deconvolution3DLayer extends BaseLayer { INDArray z = preOutput(training, workspaceMgr); - IActivation afn = layerConf().getActivationFn(); + IActivation afn = getTypedLayerConfiguration().getActivationFn(); INDArray activation = afn.getActivation(z, training); return activation; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/DepthwiseConvolution2DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/DepthwiseConvolution2DLayer.java index 888875129..2b39f70d2 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/DepthwiseConvolution2DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/DepthwiseConvolution2DLayer.java @@ -25,7 +25,6 @@ import org.deeplearning4j.exception.DL4JInvalidInputException; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.CacheMode; import org.deeplearning4j.nn.conf.ConvolutionMode; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; @@ -60,12 +59,12 @@ public class DepthwiseConvolution2DLayer extends ConvolutionLayer { @Override public Pair backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { assertInputSet(true); - CNN2DFormat format = layerConf().getCnn2dDataFormat(); + CNN2DFormat format = getTypedLayerConfiguration().getCnn2dDataFormat(); boolean nchw = format == CNN2DFormat.NCHW; if (input.rank() != 4) { throw new DL4JInvalidInputException("Got rank " + input.rank() + " array as input to Convolution layer with shape " + Arrays.toString(input.shape()) - + ". Expected rank 4 array with shape " + layerConf().getCnn2dDataFormat().dimensionNames() + ". " + + ". Expected rank 4 array with shape " + getTypedLayerConfiguration().getCnn2dDataFormat().dimensionNames() + ". " + layerId()); } INDArray bias; @@ -82,16 +81,16 @@ public class DepthwiseConvolution2DLayer extends ConvolutionLayer { int kH = (int) depthWiseWeights.size(0); int kW = (int) depthWiseWeights.size(1); - int[] dilation = layerConf().getDilation(); - int[] kernel = layerConf().getKernelSize(); - int[] strides = layerConf().getStride(); + int[] dilation = getTypedLayerConfiguration().getDilation(); + int[] kernel = getTypedLayerConfiguration().getKernelSize(); + int[] strides = getTypedLayerConfiguration().getStride(); int[] pad; if (convolutionMode == ConvolutionMode.Same) { int[] outSize = ConvolutionUtils.getOutputSize( input, kernel, strides, null, convolutionMode, dilation, format); pad = ConvolutionUtils.getSameModeTopLeftPadding(outSize, new int[]{inH, inW}, kernel, strides, dilation); } else { - pad = layerConf().getPadding(); + pad = getTypedLayerConfiguration().getPadding(); ConvolutionUtils.getOutputSize(input, kernel, strides, pad, convolutionMode, dilation, format); } @@ -110,13 +109,13 @@ public class DepthwiseConvolution2DLayer extends ConvolutionLayer { }; INDArray delta; - IActivation afn = layerConf().getActivationFn(); + IActivation afn = getTypedLayerConfiguration().getActivationFn(); Pair p = preOutput4d(true, true, workspaceMgr); delta = afn.backprop(p.getFirst(), epsilon).getFirst(); INDArray[] inputs; INDArray[] outputs; - if (layerConf().hasBias()) { + if (getTypedLayerConfiguration().hasBias()) { bias = getParamWithNoise(DepthwiseConvolutionParamInitializer.BIAS_KEY, true, workspaceMgr); inputs = new INDArray[]{input, depthWiseWeights, bias, delta}; outputs = new INDArray[]{outEpsilon, weightGradView, biasGradView}; @@ -134,7 +133,7 @@ public class DepthwiseConvolution2DLayer extends ConvolutionLayer { Nd4j.getExecutioner().exec(op); Gradient retGradient = new DefaultGradient(); - if (layerConf().hasBias()) { + if (getTypedLayerConfiguration().hasBias()) { retGradient.setGradientFor(DepthwiseConvolutionParamInitializer.BIAS_KEY, biasGradView); } retGradient.setGradientFor(DepthwiseConvolutionParamInitializer.WEIGHT_KEY, weightGradView, 'c'); @@ -159,7 +158,7 @@ public class DepthwiseConvolution2DLayer extends ConvolutionLayer { throw new DL4JInvalidInputException("Got rank " + input.rank() + " array as input to DepthwiseConvolution2D (layer name = " + layerName + ", layer index = " + index + ") with shape " + Arrays.toString(input.shape()) + ". " - + "Expected rank 4 array with shape " + layerConf().getCnn2dDataFormat().dimensionNames() + "." + + "Expected rank 4 array with shape " + getTypedLayerConfiguration().getCnn2dDataFormat().dimensionNames() + "." + (input.rank() == 2 ? " (Wrong input type (see InputType.convolutionalFlat()) or wrong data type?)" : "") + " " + layerId()); @@ -167,7 +166,7 @@ public class DepthwiseConvolution2DLayer extends ConvolutionLayer { INDArray input = this.input.castTo(dataType); //no-op if correct dtype - CNN2DFormat format = layerConf().getCnn2dDataFormat(); + CNN2DFormat format = getTypedLayerConfiguration().getCnn2dDataFormat(); boolean nchw = format == CNN2DFormat.NCHW; long inDepth = depthWiseWeights.size(2); @@ -197,9 +196,9 @@ public class DepthwiseConvolution2DLayer extends ConvolutionLayer { int kH = (int) depthWiseWeights.size(0); int kW = (int) depthWiseWeights.size(1); - int[] dilation = layerConf().getDilation(); - int[] kernel = layerConf().getKernelSize(); - int[] strides = layerConf().getStride(); + int[] dilation = getTypedLayerConfiguration().getDilation(); + int[] kernel = getTypedLayerConfiguration().getKernelSize(); + int[] strides = getTypedLayerConfiguration().getStride(); int[] pad; int[] outSize; @@ -212,7 +211,7 @@ public class DepthwiseConvolution2DLayer extends ConvolutionLayer { pad = ConvolutionUtils.getSameModeTopLeftPadding( outSize, new int[]{(int) input.size(nchw ? 2 : 1), (int) input.size(nchw ? 3 : 2)}, kernel, strides, dilation); } else { - pad = layerConf().getPadding(); + pad = getTypedLayerConfiguration().getPadding(); outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, pad, convolutionMode, dilation, format); } @@ -231,7 +230,7 @@ public class DepthwiseConvolution2DLayer extends ConvolutionLayer { }; INDArray[] inputs; - if (layerConf().hasBias()) { + if (getTypedLayerConfiguration().hasBias()) { inputs = new INDArray[]{input, depthWiseWeights, bias}; } else { inputs = new INDArray[]{input, depthWiseWeights}; @@ -260,7 +259,7 @@ public class DepthwiseConvolution2DLayer extends ConvolutionLayer { INDArray z = preOutput(training, false, workspaceMgr).getFirst(); //String afn = conf.getLayer().getActivationFunction(); - IActivation afn = layerConf().getActivationFn(); + IActivation afn = getTypedLayerConfiguration().getActivationFn(); INDArray activation = afn.getActivation(z, training); return activation; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SeparableConvolution2DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SeparableConvolution2DLayer.java index d205017bf..dc660bfc8 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SeparableConvolution2DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SeparableConvolution2DLayer.java @@ -25,7 +25,6 @@ import org.deeplearning4j.exception.DL4JInvalidInputException; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.CacheMode; import org.deeplearning4j.nn.conf.ConvolutionMode; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; @@ -64,7 +63,7 @@ public class SeparableConvolution2DLayer extends ConvolutionLayer { if (input.rank() != 4) { throw new DL4JInvalidInputException("Got rank " + input.rank() + " array as input to SubsamplingLayer with shape " + Arrays.toString(input.shape()) - + ". Expected rank 4 array with shape " + layerConf().getCnn2dDataFormat().dimensionNames() + ". " + + ". Expected rank 4 array with shape " + getTypedLayerConfiguration().getCnn2dDataFormat().dimensionNames() + ". " + layerId()); } INDArray bias; @@ -75,7 +74,7 @@ public class SeparableConvolution2DLayer extends ConvolutionLayer { INDArray input = this.input.castTo(dataType); - CNN2DFormat format = layerConf().getCnn2dDataFormat(); + CNN2DFormat format = getTypedLayerConfiguration().getCnn2dDataFormat(); boolean nchw = format == CNN2DFormat.NCHW; long miniBatch = input.size(0); @@ -86,15 +85,15 @@ public class SeparableConvolution2DLayer extends ConvolutionLayer { int kH = (int) depthWiseWeights.size(2); int kW = (int) depthWiseWeights.size(3); - int[] dilation = layerConf().getDilation(); - int[] kernel = layerConf().getKernelSize(); - int[] strides = layerConf().getStride(); + int[] dilation = getTypedLayerConfiguration().getDilation(); + int[] kernel = getTypedLayerConfiguration().getKernelSize(); + int[] strides = getTypedLayerConfiguration().getStride(); int[] pad; if (convolutionMode == ConvolutionMode.Same) { int[] outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, null, convolutionMode, dilation, format); //Also performs validation pad = ConvolutionUtils.getSameModeTopLeftPadding(outSize, new int[] {inH, inW}, kernel, strides, dilation); } else { - pad = layerConf().getPadding(); + pad = getTypedLayerConfiguration().getPadding(); ConvolutionUtils.getOutputSize(input, kernel, strides, pad, convolutionMode, dilation, format); //Also performs validation } @@ -114,7 +113,7 @@ public class SeparableConvolution2DLayer extends ConvolutionLayer { }; INDArray delta; - IActivation afn = layerConf().getActivationFn(); + IActivation afn = getTypedLayerConfiguration().getActivationFn(); Pair p = preOutput4d(true, true, workspaceMgr); delta = afn.backprop(p.getFirst(), epsilon).getFirst(); @@ -126,7 +125,7 @@ public class SeparableConvolution2DLayer extends ConvolutionLayer { INDArray opPointWiseWeightGradView = pointWiseWeightGradView.permute(2, 3, 1, 0); CustomOp op; - if(layerConf().hasBias()){ + if(getTypedLayerConfiguration().hasBias()){ bias = getParamWithNoise(SeparableConvolutionParamInitializer.BIAS_KEY, true, workspaceMgr); op = DynamicCustomOp.builder("sconv2d_bp") @@ -146,7 +145,7 @@ public class SeparableConvolution2DLayer extends ConvolutionLayer { Nd4j.getExecutioner().exec(op); Gradient retGradient = new DefaultGradient(); - if(layerConf().hasBias()){ + if(getTypedLayerConfiguration().hasBias()){ retGradient.setGradientFor(ConvolutionParamInitializer.BIAS_KEY, biasGradView); } retGradient.setGradientFor(SeparableConvolutionParamInitializer.DEPTH_WISE_WEIGHT_KEY, depthWiseWeightGradView, 'c'); @@ -168,7 +167,7 @@ public class SeparableConvolution2DLayer extends ConvolutionLayer { getParamWithNoise(SeparableConvolutionParamInitializer.POINT_WISE_WEIGHT_KEY, training, workspaceMgr); INDArray input = this.input.castTo(dataType); - if(layerConf().getCnn2dDataFormat() == CNN2DFormat.NHWC) { + if(getTypedLayerConfiguration().getCnn2dDataFormat() == CNN2DFormat.NHWC) { input = input.permute(0,3,1,2).dup(); } @@ -183,7 +182,7 @@ public class SeparableConvolution2DLayer extends ConvolutionLayer { throw new DL4JInvalidInputException("Got rank " + input.rank() + " array as input to SeparableConvolution2D (layer name = " + layerName + ", layer index = " + index + ") with shape " + Arrays.toString(input.shape()) + ". " - + "Expected rank 4 array with shape " + layerConf().getCnn2dDataFormat().dimensionNames() + "." + + "Expected rank 4 array with shape " + getTypedLayerConfiguration().getCnn2dDataFormat().dimensionNames() + "." + (input.rank() == 2 ? " (Wrong input type (see InputType.convolutionalFlat()) or wrong data type?)" : "") @@ -200,7 +199,7 @@ public class SeparableConvolution2DLayer extends ConvolutionLayer { String s = "Cannot do forward pass in SeparableConvolution2D layer (layer name = " + layerName + ", layer index = " + index + "): input array channels does not match CNN layer configuration" - + " (data format = " + layerConf().getCnn2dDataFormat() + ", data input channels = " + input.size(1) + ", [minibatch,inputDepth,height,width]=" + + " (data format = " + getTypedLayerConfiguration().getCnn2dDataFormat() + ", data input channels = " + input.size(1) + ", [minibatch,inputDepth,height,width]=" + Arrays.toString(input.shape()) + "; expected" + " input channels = " + inDepth + ") " + layerId(); @@ -215,9 +214,9 @@ public class SeparableConvolution2DLayer extends ConvolutionLayer { int kH = (int) depthWiseWeights.size(2); int kW = (int) depthWiseWeights.size(3); - int[] dilation = layerConf().getDilation(); - int[] kernel = layerConf().getKernelSize(); - int[] strides = layerConf().getStride(); + int[] dilation = getTypedLayerConfiguration().getDilation(); + int[] kernel = getTypedLayerConfiguration().getKernelSize(); + int[] strides = getTypedLayerConfiguration().getStride(); int[] pad; int[] outSize; @@ -241,7 +240,7 @@ public class SeparableConvolution2DLayer extends ConvolutionLayer { strides, dilation); } else { - pad = layerConf().getPadding(); + pad = getTypedLayerConfiguration().getPadding(); outSize = ConvolutionUtils.getOutputSize( input, kernel, @@ -273,7 +272,7 @@ public class SeparableConvolution2DLayer extends ConvolutionLayer { pointWiseWeights = pointWiseWeights.permute(2, 3, 1, 0); INDArray[] opInputs; - if (layerConf().hasBias()) { + if (getTypedLayerConfiguration().hasBias()) { opInputs = new INDArray[]{input, depthWiseWeights, pointWiseWeights, bias}; } else { opInputs = new INDArray[]{input, depthWiseWeights, pointWiseWeights}; @@ -288,7 +287,7 @@ public class SeparableConvolution2DLayer extends ConvolutionLayer { .build(); Nd4j.getExecutioner().exec(op); - if(layerConf().getCnn2dDataFormat() == CNN2DFormat.NHWC) { + if(getTypedLayerConfiguration().getCnn2dDataFormat() == CNN2DFormat.NHWC) { output = output.permute(0,2,3,1); //NCHW to NHWC } @@ -307,7 +306,7 @@ public class SeparableConvolution2DLayer extends ConvolutionLayer { INDArray z = preOutput(training, false, workspaceMgr).getFirst(); //String afn = conf.getLayer().getActivationFunction(); - IActivation afn = layerConf().getActivationFn(); + IActivation afn = getTypedLayerConfiguration().getActivationFn(); INDArray activation = afn.getActivation(z, training); return activation; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SpaceToBatch.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SpaceToBatch.java index fb824dfa3..1e5c7b270 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SpaceToBatch.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SpaceToBatch.java @@ -23,7 +23,6 @@ package org.deeplearning4j.nn.layers.convolution; import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.exception.DL4JInvalidInputException; import org.deeplearning4j.nn.conf.CNN2DFormat; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; @@ -48,20 +47,20 @@ public class SpaceToBatch extends AbstractLayer feedForwardMaskArray(INDArray maskArray, MaskState currentMaskState, int minibatchSize) { - INDArray reduced = ConvolutionUtils.cnn1dMaskReduction(maskArray, layerConf().getKernelSize()[0], - layerConf().getStride()[0], layerConf().getPadding()[0], layerConf().getDilation()[0], - layerConf().getConvolutionMode()); + INDArray reduced = ConvolutionUtils.cnn1dMaskReduction(maskArray, getTypedLayerConfiguration().getKernelSize()[0], + getTypedLayerConfiguration().getStride()[0], getTypedLayerConfiguration().getPadding()[0], getTypedLayerConfiguration().getDilation()[0], + getTypedLayerConfiguration().getConvolutionMode()); return new Pair<>(reduced, currentMaskState); } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/Subsampling3DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/Subsampling3DLayer.java index 01f1698f6..168d59357 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/Subsampling3DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/Subsampling3DLayer.java @@ -23,7 +23,6 @@ package org.deeplearning4j.nn.layers.convolution.subsampling; import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.exception.DL4JInvalidInputException; import org.deeplearning4j.nn.conf.ConvolutionMode; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.Convolution3D; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.PoolingType; @@ -69,7 +68,7 @@ public class Subsampling3DLayer extends AbstractLayer backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { assertInputSet(true); - boolean isNCDHW = layerConf().getDataFormat() == Convolution3D.DataFormat.NCDHW; + boolean isNCDHW = getTypedLayerConfiguration().getDataFormat() == Convolution3D.DataFormat.NCDHW; long miniBatch = input.size(0); long inChannels = isNCDHW ? input.size(1) : input.size(4); @@ -77,9 +76,9 @@ public class Subsampling3DLayer extends AbstractLayer ret = null; try{ ret = helper.backpropGradient(input, epsilon, kernel, strides, pad, - layerConf().getPoolingType(), convolutionMode, dilation, dataFormat, workspaceMgr); + getTypedLayerConfiguration().getPoolingType(), convolutionMode, dilation, dataFormat, workspaceMgr); } catch (ND4JOpProfilerException e){ throw e; //NaN panic etc for debugging } catch (Exception e){ @@ -137,7 +136,7 @@ public class SubsamplingLayer extends AbstractLayer(maskArray, currentMaskState); } - INDArray outMask = ConvolutionUtils.cnn2dMaskReduction(maskArray, layerConf().getKernelSize(), layerConf().getStride(), - layerConf().getPadding(), layerConf().getDilation(), layerConf().getConvolutionMode()); + INDArray outMask = ConvolutionUtils.cnn2dMaskReduction(maskArray, getTypedLayerConfiguration().getKernelSize(), getTypedLayerConfiguration().getStride(), + getTypedLayerConfiguration().getPadding(), getTypedLayerConfiguration().getDilation(), getTypedLayerConfiguration().getConvolutionMode()); return super.feedForwardMaskArray(outMask, currentMaskState, minibatchSize); } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling1D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling1D.java index e6630ad48..ae5417fc8 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling1D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling1D.java @@ -23,7 +23,6 @@ package org.deeplearning4j.nn.layers.convolution.upsampling; import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.exception.DL4JInvalidInputException; import org.deeplearning4j.nn.conf.CNN2DFormat; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.BaseUpsamplingLayer; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; @@ -57,7 +56,7 @@ public class Upsampling1D extends Upsampling2D { public Pair backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { assertInputSet(true); - int[] size = ((BaseUpsamplingLayer) layerConf()).getSize(); + int[] size = ((BaseUpsamplingLayer) getTypedLayerConfiguration()).getSize(); epsilon = epsilon.reshape(epsilon.size(0), epsilon.size(1), epsilon.size(2), 1); // we replicate the error term times "size" so that backprop works properly on it epsilon = epsilon.repeat(3, size[0]); @@ -95,7 +94,7 @@ public class Upsampling1D extends Upsampling2D { @Override protected int[] getSize(){ - return ((org.deeplearning4j.nn.conf.layers.Upsampling1D) getLayerConfiguration()).getSize(); + return getLayerConfiguration().getSize(); } @Override diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling2D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling2D.java index bf0742870..cf9da710e 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling2D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling2D.java @@ -24,7 +24,6 @@ import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.exception.DL4JInvalidInputException; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.CacheMode; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; @@ -87,12 +86,12 @@ public class Upsampling2D extends AbstractLayer backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { assertInputSet(true); - boolean ncdhw = layerConf().getDataFormat() == org.deeplearning4j.nn.conf.layers.Convolution3D.DataFormat.NCDHW; + boolean ncdhw = getTypedLayerConfiguration().getDataFormat() == org.deeplearning4j.nn.conf.layers.Convolution3D.DataFormat.NCDHW; // Assumes NCDHW order long miniBatch = input.size(0); long inChannels, inD, inH, inW; @@ -110,7 +109,7 @@ public class Upsampling3D extends AbstractLayer { - long[] axes = layerConf().getSharedAxes(); + long[] axes = getTypedLayerConfiguration().getSharedAxes(); public PReLU(LayerConfiguration conf, DataType dataType) { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/autoencoder/AutoEncoder.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/autoencoder/AutoEncoder.java index b2264d5cb..5a65889f8 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/autoencoder/AutoEncoder.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/autoencoder/AutoEncoder.java @@ -20,7 +20,6 @@ package org.deeplearning4j.nn.layers.feedforward.autoencoder; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.layers.BasePretrainNetwork; import org.deeplearning4j.nn.params.PretrainParamInitializer; @@ -55,7 +54,7 @@ public class AutoEncoder extends BasePretrainNetwork 0 ? getCorruptedInput(input, corruptionLevel) : input; setInput(corruptedX, workspaceMgr); @@ -98,8 +97,8 @@ public class AutoEncoder extends BasePretrainNetwork backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { //If this layer is layer L, then epsilon for this layer is ((w^(L+1)*(delta^(L+1))^T))^T (or equivalent) INDArray z = preOutput(true, workspaceMgr); //Note: using preOutput(INDArray) can't be used as this does a setInput(input) and resets the 'appliedDropout' flag - INDArray delta = layerConf().getActivationFn().backprop(z, epsilon).getFirst(); //TODO handle activation function params + INDArray delta = getTypedLayerConfiguration().getActivationFn().backprop(z, epsilon).getFirst(); //TODO handle activation function params if (maskArray != null) { applyMask(delta); @@ -69,7 +68,7 @@ public class ElementWiseMultiplicationLayer extends BaseLayer Integer.MAX_VALUE) throw new ND4JArraySizeException(); @@ -126,7 +125,7 @@ public class EmbeddingLayer extends BaseLayer backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { assertInputSet(true); INDArray z = preOutput(true, workspaceMgr); - INDArray delta = layerConf().getActivationFn().backprop(z, epsilon).getFirst(); //Shape: [mb, vector, seqLength] + INDArray delta = getTypedLayerConfiguration().getActivationFn().backprop(z, epsilon).getFirst(); //Shape: [mb, vector, seqLength] - boolean ncw = layerConf().getOutputFormat() == RNNFormat.NCW; + boolean ncw = getTypedLayerConfiguration().getOutputFormat() == RNNFormat.NCW; if (maskArray != null) { if(ncw){ @@ -68,9 +67,9 @@ public class EmbeddingSequenceLayer extends BaseLayer [minibatch, nOut, seqLen] i.e., NWC -> NCW } return workspaceMgr.leverageTo(ArrayType.ACTIVATIONS, ret); @@ -176,7 +175,7 @@ public class EmbeddingSequenceLayer extends BaseLayer(Arrays.asList(listeners)); } @@ -618,7 +619,7 @@ public class BatchNormalization extends BaseLayer backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { assertInputSet(true); - double k = layerConf().getK(); - double n = layerConf().getN(); - double alpha = layerConf().getAlpha(); - double beta = layerConf().getBeta(); + double k = getTypedLayerConfiguration().getK(); + double n = getTypedLayerConfiguration().getN(); + double alpha = getTypedLayerConfiguration().getAlpha(); + double beta = getTypedLayerConfiguration().getBeta(); int halfN = (int) n / 2; - if (helper != null && (helperCountFail == 0 || !layerConf().isCudnnAllowFallback())){ + if (helper != null && (helperCountFail == 0 || !getTypedLayerConfiguration().isCudnnAllowFallback())){ Pair ret = null; try { ret = helper.backpropGradient(input, epsilon, k, n, alpha, beta, workspaceMgr); @@ -120,7 +119,7 @@ public class LocalResponseNormalization //This is a memory exception - don't fallback to built-in implementation throw t; } - if(layerConf().isCudnnAllowFallback()){ + if(getTypedLayerConfiguration().isCudnnAllowFallback()){ helperCountFail++; log.warn("CuDNN LocalResponseNormalization backprop execution failed - falling back on built-in implementation",t); } else { @@ -132,7 +131,7 @@ public class LocalResponseNormalization } } - boolean nchw = layerConf().getDataFormat() == CNN2DFormat.NCHW; + boolean nchw = getTypedLayerConfiguration().getDataFormat() == CNN2DFormat.NCHW; int chDim = nchw ? 1 : 3; int hDim = nchw ? 2 : 1; int wDim = nchw ? 3 : 2; @@ -185,13 +184,13 @@ public class LocalResponseNormalization private Triple activateHelper(boolean training, LayerWorkspaceMgr workspaceMgr, boolean forBackprop){ assertInputSet(false); - double k = layerConf().getK(); - double n = layerConf().getN(); - double alpha = layerConf().getAlpha(); - double beta = layerConf().getBeta(); + double k = getTypedLayerConfiguration().getK(); + double n = getTypedLayerConfiguration().getN(); + double alpha = getTypedLayerConfiguration().getAlpha(); + double beta = getTypedLayerConfiguration().getBeta(); int halfN = (int) n / 2; - if (helper != null && (helperCountFail == 0 || !layerConf().isCudnnAllowFallback())){ + if (helper != null && (helperCountFail == 0 || !getTypedLayerConfiguration().isCudnnAllowFallback())){ INDArray activations = null; try { activations = helper.activate(input, training, k, n, alpha, beta, workspaceMgr); @@ -203,7 +202,7 @@ public class LocalResponseNormalization throw t; } - if(layerConf().isCudnnAllowFallback()){ + if(getTypedLayerConfiguration().isCudnnAllowFallback()){ helperCountFail++; log.warn("CuDNN LocalResponseNormalization backprop execution failed - falling back on built-in implementation",t); } else { @@ -215,7 +214,7 @@ public class LocalResponseNormalization } } - boolean nchw = layerConf().getDataFormat() == CNN2DFormat.NCHW; + boolean nchw = getTypedLayerConfiguration().getDataFormat() == CNN2DFormat.NCHW; int chDim = nchw ? 1 : 3; val channel = input.size(chDim); @@ -287,13 +286,13 @@ public class LocalResponseNormalization } @Override - public INDArray params() { + public INDArray getModelParams() { return null; } @Override public INDArray getParam(String param) { - return params(); + return getModelParams(); } @Override diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/objdetect/Yolo2OutputLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/objdetect/Yolo2OutputLayer.java index e5f0fbf1e..49a61f496 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/objdetect/Yolo2OutputLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/objdetect/Yolo2OutputLayer.java @@ -24,7 +24,6 @@ import lombok.*; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.layers.IOutputLayer; import org.deeplearning4j.nn.conf.CNN2DFormat; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; @@ -86,19 +85,19 @@ public class Yolo2OutputLayer extends AbstractLayer Predicted WH in grid units (0 to 13 usually) INDArray predictedWHPreExp = input5.get(all(), all(), interval(2,4), all(), all()); INDArray predictedWH = Transforms.exp(predictedWHPreExp, true); - Broadcast.mul(predictedWH, layerConf().getBoundingBoxes().castTo(predictedWH.dataType()), predictedWH, 1, 2); //Box priors: [b, 2]; predictedWH: [mb, b, 2, h, w] + Broadcast.mul(predictedWH, getTypedLayerConfiguration().getBoundingBoxes().castTo(predictedWH.dataType()), predictedWH, 1, 2); //Box priors: [b, 2]; predictedWH: [mb, b, 2, h, w] //Apply sqrt to W/H in preparation for loss function INDArray predictedWHSqrt = Transforms.sqrt(predictedWH, true); @@ -236,11 +235,11 @@ public class Yolo2OutputLayer extends AbstractLayer gradientAndScore() { - return new Pair<>(gradient(), score()); + return new Pair<>(gradient(), this.getScore()); } @Override @@ -617,7 +616,7 @@ public class Yolo2OutputLayer extends AbstractLayer getPredictedObjects(INDArray networkOutput, double threshold){ - return YoloUtils.getPredictedObjects(layerConf().getBoundingBoxes(), networkOutput, threshold, 0.0); + return YoloUtils.getPredictedObjects(getTypedLayerConfiguration().getBoundingBoxes(), networkOutput, threshold, 0.0); } /** @@ -651,7 +650,7 @@ public class Yolo2OutputLayer extends AbstractLayer getGradientsAndDelta(INDArray preOut, LayerWorkspaceMgr workspaceMgr) { - ILossFunction lossFunction = layerConf().getLossFn(); + ILossFunction lossFunction = getTypedLayerConfiguration().getLossFn(); INDArray labels2d = getLabels2d(workspaceMgr, ArrayType.BP_WORKING_MEM); - INDArray delta = lossFunction.computeGradient(labels2d, preOut, layerConf().getActivationFn(), maskArray); + INDArray delta = lossFunction.computeGradient(labels2d, preOut, getTypedLayerConfiguration().getActivationFn(), maskArray); org.deeplearning4j.nn.conf.ocnn.OCNNOutputLayer conf = ( org.deeplearning4j.nn.conf.ocnn.OCNNOutputLayer) getLayerConfiguration(); @@ -165,20 +164,20 @@ public class OCNNOutputLayer extends BaseOutputLayer sigmoid derivative - INDArray firstVertDerivV = layerConf().getActivationFn() + INDArray firstVertDerivV = getTypedLayerConfiguration().getActivationFn() .backprop(xTimesV.dup(),Nd4j.ones(input.dataType(), xTimesV.shape())) .getFirst().muliRowVector(getParam(W_KEY).neg()); firstVertDerivV = firstVertDerivV.muliColumnVector(delta) - .reshape('f',input.size(0),1,layerConf().getHiddenSize()); + .reshape('f',input.size(0),1, getTypedLayerConfiguration().getHiddenSize()); INDArray secondTermDerivV = input.reshape('f', input.size(0),getParam(V_KEY).size(0),1); @@ -251,7 +250,7 @@ public class OCNNOutputLayer extends BaseOutputLayer backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { assertInputSet(true); - if (!layerConf().isCollapseDimensions() && epsilon.rank() != 2) { + if (!getTypedLayerConfiguration().isCollapseDimensions() && epsilon.rank() != 2) { val origShape = epsilon.shape(); //Don't collapse dims case: error should be [minibatch, vectorSize, 1] or [minibatch, channels, 1, 1] //Reshape it to 2d, to get rid of the 1s @@ -293,7 +292,7 @@ public class GlobalPoolingLayer extends AbstractLayer stateMap) { throw new UnsupportedOperationException("Not supported: cannot RnnTimeStep bidirectional layers therefore " + @@ -255,20 +272,14 @@ public class BidirectionalLayer implements RecurrentLayer { } @Override - public Collection getListeners() { - return fwd.getListeners(); + public Collection getTrainingListeners() { + return fwd.getTrainingListeners(); } @Override - public void setListeners(TrainingListener... listeners) { - fwd.setListeners(listeners); - bwd.setListeners(listeners); - } - - @Override - public void addListeners(TrainingListener... listener) { - fwd.addListeners(listener); - bwd.addListeners(listener); + public void addTrainingListeners(TrainingListener... listeners) { + fwd.addTrainingListeners(listeners); + bwd.addTrainingListeners(listeners); } @Override @@ -287,8 +298,8 @@ public class BidirectionalLayer implements RecurrentLayer { } @Override - public double score() { - return fwd.score() + bwd.score(); + public double getScore() { + return fwd.getScore() + bwd.getScore(); } @Override @@ -298,14 +309,10 @@ public class BidirectionalLayer implements RecurrentLayer { } @Override - public INDArray params() { + public INDArray getModelParams() { return paramsView; } - @Override - public TrainingConfig getConfig() { - return layerConfiguration; - } @Override public long numParams() { @@ -548,9 +555,9 @@ public class BidirectionalLayer implements RecurrentLayer { //No op } - public void setListeners(Collection listeners) { - fwd.setListeners(listeners.toArray(new TrainingListener[]{})); - bwd.setListeners(listeners.toArray(new TrainingListener[]{})); + public void addTrainingListeners(Collection listeners) { + fwd.addTrainingListeners(listeners.toArray(new TrainingListener[]{})); + bwd.addTrainingListeners(listeners.toArray(new TrainingListener[]{})); } @Override @@ -708,4 +715,11 @@ public class BidirectionalLayer implements RecurrentLayer { public void close(){ //No-op for individual layers } + /** + * @return 1d parameter vector + */ + @Override + public INDArray getParams() { + throw new RuntimeException("Not implemented."); + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTM.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTM.java index ac5c57165..595dd0e2c 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTM.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTM.java @@ -23,7 +23,6 @@ package org.deeplearning4j.nn.layers.recurrent; import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.CacheMode; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; @@ -76,14 +75,14 @@ public class GravesBidirectionalLSTM fwdPass.fwdPassOutput = permuteIfNWC(fwdPass.fwdPassOutput); final Pair forwardsGradient = LSTMHelpers.backpropGradientHelper(this, this.layerConfiguration.getNetConfiguration(), - this.layerConf().getGateActivationFn(), permuteIfNWC(this.input), + this.getTypedLayerConfiguration().getGateActivationFn(), permuteIfNWC(this.input), getParam(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_FORWARDS), getParam(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS), permuteIfNWC(epsilon), truncatedBPTT, tbpttBackwardLength, fwdPass, true, GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS, GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_FORWARDS, GravesBidirectionalLSTMParamInitializer.BIAS_KEY_FORWARDS, gradientViews, maskArray, true, - null, workspaceMgr, layerConf().isHelperAllowFallback()); + null, workspaceMgr, getTypedLayerConfiguration().isHelperAllowFallback()); @@ -91,14 +90,14 @@ public class GravesBidirectionalLSTM final Pair backwardsGradient = LSTMHelpers.backpropGradientHelper(this, this.layerConfiguration.getNetConfiguration(), - this.layerConf().getGateActivationFn(), permuteIfNWC(this.input), + this.getTypedLayerConfiguration().getGateActivationFn(), permuteIfNWC(this.input), getParam(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_BACKWARDS), getParam(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_BACKWARDS), permuteIfNWC(epsilon), truncatedBPTT, tbpttBackwardLength, backPass, false, GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_BACKWARDS, GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_BACKWARDS, GravesBidirectionalLSTMParamInitializer.BIAS_KEY_BACKWARDS, gradientViews, maskArray, true, - null, workspaceMgr, layerConf().isHelperAllowFallback()); + null, workspaceMgr, getTypedLayerConfiguration().isHelperAllowFallback()); forwardsGradient.setSecond(permuteIfNWC(forwardsGradient.getSecond())); backwardsGradient.setSecond(permuteIfNWC(backwardsGradient.getSecond())); @@ -118,7 +117,7 @@ public class GravesBidirectionalLSTM final Gradient correctOrderedGradient = new DefaultGradient(); - for (final String key : paramsTable.keySet()) { + for (final String key : getParamTable().keySet()) { correctOrderedGradient.setGradientFor(key, combinedGradient.getGradientFor(key)); } @@ -156,22 +155,22 @@ public class GravesBidirectionalLSTM cachedPassForward = null; } else { - forwardsEval = LSTMHelpers.activateHelper(this, this.layerConfiguration.getNetConfiguration(), this.layerConf().getGateActivationFn(), + forwardsEval = LSTMHelpers.activateHelper(this, this.layerConfiguration.getNetConfiguration(), this.getTypedLayerConfiguration().getGateActivationFn(), permuteIfNWC(this.input), getParam(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_FORWARDS), getParam(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS), getParam(GravesBidirectionalLSTMParamInitializer.BIAS_KEY_FORWARDS), training, null, null, forBackprop || (cacheMode != CacheMode.NONE && training), true, GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS, maskArray, true, null, - forBackprop ? cacheMode : CacheMode.NONE, workspaceMgr, layerConf().isHelperAllowFallback()); + forBackprop ? cacheMode : CacheMode.NONE, workspaceMgr, getTypedLayerConfiguration().isHelperAllowFallback()); - backwardsEval = LSTMHelpers.activateHelper(this, this.layerConfiguration.getNetConfiguration(), this.layerConf().getGateActivationFn(), + backwardsEval = LSTMHelpers.activateHelper(this, this.layerConfiguration.getNetConfiguration(), this.getTypedLayerConfiguration().getGateActivationFn(), permuteIfNWC(this.input), getParam(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_BACKWARDS), getParam(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_BACKWARDS), getParam(GravesBidirectionalLSTMParamInitializer.BIAS_KEY_BACKWARDS), training, null, null, forBackprop || (cacheMode != CacheMode.NONE && training), false, GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_BACKWARDS, maskArray, true, null, - forBackprop ? cacheMode : CacheMode.NONE, workspaceMgr, layerConf().isHelperAllowFallback()); + forBackprop ? cacheMode : CacheMode.NONE, workspaceMgr, getTypedLayerConfiguration().isHelperAllowFallback()); forwardsEval.fwdPassOutput = permuteIfNWC(forwardsEval.fwdPassOutput); backwardsEval.fwdPassOutput = permuteIfNWC(backwardsEval.fwdPassOutput); @@ -216,10 +215,10 @@ public class GravesBidirectionalLSTM biasKey = GravesBidirectionalLSTMParamInitializer.BIAS_KEY_BACKWARDS; } - FwdPassReturn ret = LSTMHelpers.activateHelper(this, this.layerConfiguration.getNetConfiguration(), this.layerConf().getGateActivationFn(), permuteIfNWC(this.input), + FwdPassReturn ret = LSTMHelpers.activateHelper(this, this.layerConfiguration.getNetConfiguration(), this.getTypedLayerConfiguration().getGateActivationFn(), permuteIfNWC(this.input), getParam(recurrentKey), getParam(inputKey), getParam(biasKey), training, prevOutputActivations, prevMemCellState, forBackprop, forwards, inputKey, maskArray, true, - null, forBackprop ? cacheMode : CacheMode.NONE, workspaceMgr, layerConf().isHelperAllowFallback()); + null, forBackprop ? cacheMode : CacheMode.NONE, workspaceMgr, getTypedLayerConfiguration().isHelperAllowFallback()); ret.fwdPassOutput = permuteIfNWC(ret.fwdPassOutput); return ret; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesLSTM.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesLSTM.java index 5aedd780b..6626e927e 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesLSTM.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesLSTM.java @@ -23,7 +23,6 @@ package org.deeplearning4j.nn.layers.recurrent; import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.CacheMode; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.params.GravesLSTMParamInitializer; @@ -84,11 +83,11 @@ public class GravesLSTM extends BaseRecurrentLayer p = LSTMHelpers.backpropGradientHelper(this, - this.layerConfiguration.getNetConfiguration(), this.layerConf().getGateActivationFn(), permuteIfNWC(this.input), + this.layerConfiguration.getNetConfiguration(), this.getTypedLayerConfiguration().getGateActivationFn(), permuteIfNWC(this.input), recurrentWeights, inputWeights, permuteIfNWC(epsilon), truncatedBPTT, tbpttBackwardLength, fwdPass, true, GravesLSTMParamInitializer.INPUT_WEIGHT_KEY, GravesLSTMParamInitializer.RECURRENT_WEIGHT_KEY, GravesLSTMParamInitializer.BIAS_KEY, gradientViews, maskArray, true, null, - workspaceMgr, layerConf().isHelperAllowFallback()); + workspaceMgr, getTypedLayerConfiguration().isHelperAllowFallback()); weightNoiseParams.clear(); p.setSecond(permuteIfNWC(backpropDropOutIfPresent(p.getSecond()))); @@ -129,11 +128,11 @@ public class GravesLSTM extends BaseRecurrentLayer p = LSTMHelpers.backpropGradientHelper(this, - getNetConfiguration(), this.layerConf().getGateActivationFn(), permuteIfNWC(this.input), + getNetConfiguration(), this.getTypedLayerConfiguration().getGateActivationFn(), permuteIfNWC(this.input), recurrentWeights, inputWeights, permuteIfNWC(epsilon), truncatedBPTT, tbpttBackwardLength, fwdPass, true, LSTMParamInitializer.INPUT_WEIGHT_KEY, LSTMParamInitializer.RECURRENT_WEIGHT_KEY, LSTMParamInitializer.BIAS_KEY, gradientViews, null, false, helper, workspaceMgr, - layerConf().isHelperAllowFallback()); + getTypedLayerConfiguration().isHelperAllowFallback()); weightNoiseParams.clear(); p.setSecond(permuteIfNWC(backpropDropOutIfPresent(p.getSecond()))); @@ -140,7 +139,7 @@ public class LSTM extends BaseRecurrentLayer= endIdx; iTimeIndex--) { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/RnnLossLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/RnnLossLayer.java index de9d75928..e734212e8 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/RnnLossLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/RnnLossLayer.java @@ -25,7 +25,6 @@ import lombok.Setter; import org.deeplearning4j.eval.Evaluation; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.api.layers.IOutputLayer; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.RNNFormat; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; @@ -64,7 +63,7 @@ public class RnnLossLayer extends BaseLayer { } } - org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayer bl = (org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayer) layerConf(); + org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayer bl = (org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayer) getTypedLayerConfiguration(); bl.validateInput(input); Map phMap = new HashMap<>(); @@ -104,7 +103,7 @@ public class SameDiffLayer extends AbstractLayer { if(maskArray != null){ phMap.put(MASK_KEY, maskArray); } else { - phMap.put(MASK_KEY, layerConf().onesMaskForInput(input)); + phMap.put(MASK_KEY, getTypedLayerConfiguration().onesMaskForInput(input)); } //Configure memory management for SameDiff instance - use DL4J workspaces @@ -176,7 +175,7 @@ public class SameDiffLayer extends AbstractLayer { sessionMap.get(Thread.currentThread().getId()).setMmgr(mmgr); - org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayer bl = (org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayer) layerConf(); + org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayer bl = (org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayer) getTypedLayerConfiguration(); bl.validateInput(input); Map phMap = new HashMap<>(); @@ -185,7 +184,7 @@ public class SameDiffLayer extends AbstractLayer { if(maskArray != null){ phMap.put(MASK_KEY, maskArray); } else { - phMap.put(MASK_KEY, layerConf().onesMaskForInput(input)); + phMap.put(MASK_KEY, getTypedLayerConfiguration().onesMaskForInput(input)); } List requiredGrads = new ArrayList<>(paramTable.size() + 1); @@ -215,7 +214,7 @@ public class SameDiffLayer extends AbstractLayer { * @return the parameters of the neural network */ @Override - public INDArray params() { + public INDArray getModelParams() { return params; } @@ -272,7 +271,7 @@ public class SameDiffLayer extends AbstractLayer { @Override public void setBackpropGradientsViewArray(INDArray gradients) { this.gradients = gradients; - this.gradTable = layerConf().initializer().getGradientsFromFlattened(this.getLayerConfiguration(), gradients); + this.gradTable = getTypedLayerConfiguration().initializer().getGradientsFromFlattened(this.getLayerConfiguration(), gradients); } @Override @@ -298,7 +297,7 @@ public class SameDiffLayer extends AbstractLayer { protected void doInit(){ try(MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) { - org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayer bl = (org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayer) layerConf(); + org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayer bl = (org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayer) getTypedLayerConfiguration(); sameDiff = SameDiff.create(); //Use SingleThreadArrayHolder so we can use views (also don't nede multithreading here, DL4J is not thread safe) sameDiff.setArrayHolders(new SingleThreadArrayHolder(), new SingleThreadArrayHolder(), false); @@ -307,7 +306,7 @@ public class SameDiffLayer extends AbstractLayer { long[] inputShape = input.shape().clone(); inputShape[0] = -1; SDVariable inputVar = sameDiff.placeHolder(INPUT_KEY, dataType, inputShape); - Map paramShapes = layerConf().getLayerParams().getParamShapes(); + Map paramShapes = getTypedLayerConfiguration().getLayerParams().getParamShapes(); Map params = new LinkedHashMap<>(); for (String s : paramShapes.keySet()) { val ps = paramShapes.get(s); @@ -336,7 +335,7 @@ public class SameDiffLayer extends AbstractLayer { @Override public Pair feedForwardMaskArray(INDArray maskArray, MaskState currentMaskState, int minibatchSize) { - org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayer bl = (org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayer) layerConf(); + org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayer bl = (org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayer) getTypedLayerConfiguration(); this.maskArray = maskArray; this.maskState = currentMaskState; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffOutputLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffOutputLayer.java index d3cc93049..60d4d4c7d 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffOutputLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffOutputLayer.java @@ -25,7 +25,6 @@ import lombok.Setter; import lombok.val; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.layers.IOutputLayer; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; @@ -97,7 +96,7 @@ public class SameDiffOutputLayer extends AbstractLayer phMap = new HashMap<>(); phMap.put(INPUT_KEY, input); - if(!activations && layerConf().labelsRequired() && labels != null) { + if(!activations && getTypedLayerConfiguration().labelsRequired() && labels != null) { phMap.put(LABELS_KEY, labels); } - String s = activations ? layerConf().activationsVertexName() : outputVar.name(); + String s = activations ? getTypedLayerConfiguration().activationsVertexName() : outputVar.name(); INDArray out = sameDiff.outputSingle(phMap, s); @@ -153,7 +152,7 @@ public class SameDiffOutputLayer extends AbstractLayer backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { assertInputSet(true); - Preconditions.checkState(!layerConf().labelsRequired() || labels != null, "Cannot execute backprop: Labels are not set. " + + Preconditions.checkState(!getTypedLayerConfiguration().labelsRequired() || labels != null, "Cannot execute backprop: Labels are not set. " + "If labels are not required for this SameDiff output layer, override SameDiffOutputLayer.labelsRequired()" + " to return false instead"); Gradient g = new DefaultGradient(); @@ -228,7 +227,7 @@ public class SameDiffOutputLayer extends AbstractLayer paramShapes = layerConf().getLayerParams().getParamShapes(); + Map paramShapes = getTypedLayerConfiguration().getLayerParams().getParamShapes(); Map params = new LinkedHashMap<>(); for (String s : paramShapes.keySet()) { val ps = paramShapes.get(s); @@ -341,7 +340,7 @@ public class SameDiffOutputLayer extends AbstractLayer gradientAndScore() { - return new Pair<>(gradient(), score()); + return new Pair<>(gradient(), getScore()); } @Override @@ -146,7 +145,7 @@ public class CenterLossOutputLayer extends BaseOutputLayer getGradientsAndDelta(INDArray preOut, LayerWorkspaceMgr workspaceMgr) { - ILossFunction lossFunction = layerConf().getLossFn(); + ILossFunction lossFunction = getTypedLayerConfiguration().getLossFn(); INDArray labels2d = getLabels2d(workspaceMgr, ArrayType.BP_WORKING_MEM); if (labels2d.size(1) != preOut.size(1)) { throw new DL4JInvalidInputException( @@ -182,7 +181,7 @@ public class CenterLossOutputLayer extends BaseOutputLayer params; @Getter protected transient Map gradientViews; - protected double score = 0.0; protected ConvexOptimizer optimizer; protected Gradient gradient; - protected Collection trainingListeners = new ArrayList<>(); protected int index = 0; protected INDArray maskArray; protected Solver solver; - protected int[] encoderLayerSizes; protected int[] decoderLayerSizes; protected ReconstructionDistribution reconstructionDistribution; @@ -87,18 +84,15 @@ public class VariationalAutoencoder implements Layer { protected int numSamples; protected CacheMode cacheMode = CacheMode.NONE; protected DataType dataType; - protected boolean zeroedPretrainParamGradients = false; - protected Map weightNoiseParams = new HashMap<>(); - @Getter @Setter protected int iterationCount; @Getter @Setter protected int epochCount; - @Getter @Setter @NonNull private LayerConfiguration layerConfiguration; + private @Getter @Setter Collection trainingListeners; public VariationalAutoencoder(@NonNull LayerConfiguration layerConfiguration, DataType dataType) { this.layerConfiguration = layerConfiguration; @@ -119,6 +113,16 @@ public class VariationalAutoencoder implements Layer { .getNumSamples(); } + /** + * Get a reference to the network this layer is part of. + * + * @return + */ + @Override + public IModel getNet() { + throw new RuntimeException("Not implemented."); + } + protected org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder layerConf() { return (org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder) layerConfiguration; } @@ -175,7 +179,7 @@ public class VariationalAutoencoder implements Layer { } @Override - public double score() { + public double getScore() { return score; } @@ -277,7 +281,7 @@ public class VariationalAutoencoder implements Layer { this.score += logPTheta / numSamples; //If we have any training listeners (for example, for UI StatsListener - pass on activations) - if (trainingListeners != null && !trainingListeners.isEmpty() && l == 0) { //Note: only doing this on the *first* sample + if (getTrainingConfig() != null && !getTrainingListeners().isEmpty() && l == 0) { //Note: only doing this on the *first* sample Map activations = new LinkedHashMap<>(); for (int i = 0; i < fwd.encoderActivations.length; i++) { activations.put("e" + i, fwd.encoderActivations[i]); @@ -288,9 +292,9 @@ public class VariationalAutoencoder implements Layer { } activations.put(VariationalAutoencoderParamInitializer.PXZ_PREFIX, reconstructionDistribution.generateAtMean(pxzDistributionPreOut)); - if (!trainingListeners.isEmpty()) { + if (!getTrainingListeners().isEmpty()) { try (MemoryWorkspace workspace = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { - for (TrainingListener tl : trainingListeners) { + for (TrainingListener tl : getTrainingListeners()) { tl.onForwardPass(this, activations); } } @@ -495,7 +499,7 @@ public class VariationalAutoencoder implements Layer { } @Override - public INDArray params() { + public INDArray getModelParams() { return paramsFlattened; } @@ -510,8 +514,13 @@ public class VariationalAutoencoder implements Layer { } @Override - public TrainingConfig getConfig() { - return layerConfiguration; + public void setParamTable(Map paramTable) { + this.params = paramTable; + } + + @Override + public ITraininableLayerConfiguration getTrainingConfig() { + return (BaseLayerConfiguration) layerConfiguration; } @Override @@ -519,6 +528,24 @@ public class VariationalAutoencoder implements Layer { return numParams(false); } + /** + * @return 1d parameter vector + */ + @Override + public INDArray getParams() { + throw new RuntimeException("Not implemented."); + } + + @Override + public void setParams(INDArray params) { + if (params.length() != this.paramsFlattened.length()) { + throw new IllegalArgumentException("Cannot set parameters: expected parameters vector of length " + + this.paramsFlattened.length() + " but got parameters array of length " + params.length() + + " " + layerId()); + } + this.paramsFlattened.assign(params); + } + @Override public long numParams(boolean backwards) { int ret = 0; @@ -530,16 +557,6 @@ public class VariationalAutoencoder implements Layer { return ret; } - @Override - public void setParams(INDArray params) { - if (params.length() != this.paramsFlattened.length()) { - throw new IllegalArgumentException("Cannot set parameters: expected parameters vector of length " - + this.paramsFlattened.length() + " but got parameters array of length " + params.length() - + " " + layerId()); - } - this.paramsFlattened.assign(params); - } - @Override public void setParamsViewArray(INDArray params) { if (this.params != null && params.length() != numParams()) @@ -577,7 +594,7 @@ public class VariationalAutoencoder implements Layer { @Override public Pair gradientAndScore() { - return new Pair<>(gradient(), score()); + return new Pair<>(gradient(), getScore()); } @Override @@ -695,7 +712,6 @@ public class VariationalAutoencoder implements Layer { return params.get(param); } - @Override public Map getParamTable(boolean backpropParamsOnly) { Map map = new LinkedHashMap<>(); @@ -712,11 +728,6 @@ public class VariationalAutoencoder implements Layer { return true; } - @Override - public void setParamTable(Map paramTable) { - this.params = paramTable; - } - @Override public void setParam(String key, INDArray val) { if (getParamTable().containsKey(key)) { @@ -844,15 +855,6 @@ public class VariationalAutoencoder implements Layer { return f.pzxMeanPreOut; } - @AllArgsConstructor - @Data - private static class VAEFwdHelper { - private INDArray[] encoderPreOuts; - private INDArray pzxMeanPreOut; - private INDArray[] encoderActivations; - } - - private VAEFwdHelper doForward(boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) { assertInputSet(false); @@ -904,48 +906,8 @@ public class VariationalAutoencoder implements Layer { } @Override - public Collection getListeners() { - if (trainingListeners == null) { - return null; - } - - return new ArrayList<>(trainingListeners); - } - - @Override - public void setListeners(TrainingListener... listeners) { - setListeners(Arrays.asList(listeners)); - } - - public void setListeners(Collection listeners) { - if (trainingListeners == null) - trainingListeners = new ArrayList<>(); - else - trainingListeners.clear(); - if (trainingListeners == null) - trainingListeners = new ArrayList<>(); - else - trainingListeners.clear(); - - if (listeners != null && !listeners.isEmpty()) { - trainingListeners.addAll(listeners); - } - } - - - /** - * This method ADDS additional TrainingListener to existing listeners - * - * @param listeners - */ - @Override - public void addListeners(TrainingListener... listeners) { - if (this.trainingListeners == null) { - setListeners(listeners); - return; - } - - Collections.addAll(trainingListeners, listeners); + public int getIndex() { + return index; } @Override @@ -953,21 +915,11 @@ public class VariationalAutoencoder implements Layer { this.index = index; } - @Override - public int getIndex() { - return index; - } - @Override public void setInput(INDArray input, LayerWorkspaceMgr layerWorkspaceMgr) { this.input = input; } - @Override - public void setInputMiniBatchSize(int size) { - - } - @Override public int getInputMiniBatchSize() { if (input.size(0) > Integer.MAX_VALUE) @@ -976,8 +928,8 @@ public class VariationalAutoencoder implements Layer { } @Override - public void setMaskArray(INDArray maskArray) { - this.maskArray = maskArray; + public void setInputMiniBatchSize(int size) { + } @Override @@ -985,6 +937,11 @@ public class VariationalAutoencoder implements Layer { return maskArray; } + @Override + public void setMaskArray(INDArray maskArray) { + this.maskArray = maskArray; + } + @Override public boolean isPretrainLayer() { return true; @@ -1022,7 +979,8 @@ public class VariationalAutoencoder implements Layer { if (solver == null) { try (MemoryWorkspace workspace = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { - solver = new Solver.Builder().model(this).configure(getNetConfiguration()).listeners(getListeners()).build(); + solver = new Solver.Builder().model(this).configure(getNetConfiguration()).listeners( + getTrainingListeners()).build(); } } this.optimizer = solver.getOptimizer(); @@ -1255,4 +1213,31 @@ public class VariationalAutoencoder implements Layer { public void close(){ //No-op for individual layers } + + /** + * Replace the TrainingListeners for this model + * + * @param listeners new listeners + */ + @Override + public void addTrainingListeners(TrainingListener... listeners) { + trainingListeners.addAll(List.of(listeners)); + } + +/** +* + * @param listeners + */ + @Override + public void addTrainingListeners(Collection listeners) { + trainingListeners.addAll(listeners); + } + + @AllArgsConstructor + @Data + private static class VAEFwdHelper { + private INDArray[] encoderPreOuts; + private INDArray pzxMeanPreOut; + private INDArray[] encoderActivations; + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/wrapper/BaseWrapperLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/wrapper/BaseWrapperLayer.java index d27d9cfbb..497b08aaf 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/wrapper/BaseWrapperLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/wrapper/BaseWrapperLayer.java @@ -24,11 +24,13 @@ import java.util.Collection; import java.util.Map; import lombok.Data; import lombok.NonNull; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; -import org.deeplearning4j.nn.api.TrainingConfig; +import org.deeplearning4j.nn.api.ITraininableLayerConfiguration; import org.deeplearning4j.nn.conf.CacheMode; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.BaseLayerConfiguration; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.AbstractLayer; @@ -36,17 +38,104 @@ import org.deeplearning4j.nn.layers.LayerHelper; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.deeplearning4j.optimize.api.ConvexOptimizer; import org.deeplearning4j.optimize.api.TrainingListener; +import org.jetbrains.annotations.NotNull; import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.dataset.api.DataSet; +import org.nd4j.linalg.dataset.api.MultiDataSet; +import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; +import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator; @Data public abstract class BaseWrapperLayer extends AbstractLayer { protected Layer underlying; - public BaseWrapperLayer(@NonNull Layer underlying) { this.underlying = underlying; + this.setLayerConfiguration( underlying.getLayerConfiguration() ); + } + + @Override + public BaseLayerConfiguration getTypedLayerConfiguration() { + return (BaseLayerConfiguration) underlying.getLayerConfiguration(); + } + + /** + * This method returns updater state (if applicable), null otherwise + * + * @return + */ + @Override + public INDArray updaterState() { + return underlying.updaterState(); + } + + /** + * This method fits model with a given DataSet + * + * @param dataSet + */ + @Override + public void fit(DataSet dataSet) { +underlying.fit(dataSet); + } + + /** + * This method fits model with a given MultiDataSet + * + * @param dataSet + */ + @Override + public void fit(MultiDataSet dataSet) { +underlying.fit(dataSet); + } + + /** + * This method fits model with a given DataSetIterator + * + * @param iterator + */ + @Override + public void fit(DataSetIterator iterator) { +underlying.fit(iterator); + } + + /** + * This method fits model with a given MultiDataSetIterator + * + * @param iterator + */ + @Override + public void fit(MultiDataSetIterator iterator) { +underlying.fit(iterator); + } + + /** + * @param netConfiguration + */ + @Override + public void setNetConfiguration(@NonNull NeuralNetConfiguration netConfiguration) { +underlying.setNetConfiguration(netConfiguration); + } + + + /** + * Get a reference to the network this layer is part of. + * + * @return + */ + @Override + public IModel getNet() { + return underlying.getNet(); + } + + /** + * @return 1d parameter vector + */ + @Override + public INDArray getParams() { + return underlying.getParams(); } /** @@ -96,19 +185,15 @@ public abstract class BaseWrapperLayer extends AbstractLayer { return underlying.activate(input, training, workspaceMgr); } + @NotNull @Override - public Collection getListeners() { - return underlying.getListeners(); + public Collection getTrainingListeners() { + return underlying.getTrainingListeners(); } @Override - public void setListeners(TrainingListener... listeners) { - underlying.setListeners(listeners); - } - - @Override - public void addListeners(TrainingListener... listener) { - underlying.addListeners(listener); + public void addTrainingListeners(TrainingListener... listeners) { + underlying.addTrainingListeners(listeners); } @Override @@ -127,8 +212,8 @@ public abstract class BaseWrapperLayer extends AbstractLayer { } @Override - public double score() { - return underlying.score(); + public double getScore() { + return underlying.getScore(); } @Override @@ -137,8 +222,8 @@ public abstract class BaseWrapperLayer extends AbstractLayer { } @Override - public INDArray params() { - return underlying.params(); + public INDArray getModelParams() { + return underlying.getParams(); } @Override @@ -333,8 +418,8 @@ public abstract class BaseWrapperLayer extends AbstractLayer { } @Override - public TrainingConfig getConfig() { - return underlying.getConfig(); + public ITraininableLayerConfiguration getTrainingConfig() { + return underlying.getTrainingConfig(); } @Override diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java index 575ee27e9..2b27c0179 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java @@ -20,28 +20,17 @@ package org.deeplearning4j.nn.multilayer; - -import java.io.File; -import java.io.IOException; -import java.io.ObjectInputStream; -import java.io.ObjectOutputStream; -import java.io.Serializable; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.LinkedHashMap; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Set; +import com.fasterxml.jackson.annotation.JsonIdentityInfo; +import com.fasterxml.jackson.annotation.ObjectIdGenerators; +import java.io.*; +import java.util.*; import java.util.stream.Collectors; import lombok.Getter; import lombok.NonNull; import lombok.Setter; import lombok.extern.slf4j.Slf4j; import lombok.val; +import net.brutex.ai.dnn.api.IModel; import net.brutex.ai.dnn.networks.ArtificialNeuralNetwork; import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.StringUtils; @@ -49,30 +38,20 @@ import org.bytedeco.javacpp.Pointer; import org.deeplearning4j.datasets.iterator.MultiDataSetWrapperIterator; import org.deeplearning4j.exception.DL4JException; import org.deeplearning4j.exception.DL4JInvalidInputException; -import org.deeplearning4j.nn.api.Classifier; -import org.deeplearning4j.nn.api.FwdPassType; -import org.deeplearning4j.nn.api.Layer; -import org.deeplearning4j.nn.api.MaskState; -import org.deeplearning4j.nn.api.ModelAdapter; -import org.deeplearning4j.nn.api.TrainingConfig; +import org.deeplearning4j.nn.api.*; import org.deeplearning4j.nn.api.Updater; import org.deeplearning4j.nn.api.layers.IOutputLayer; import org.deeplearning4j.nn.api.layers.RecurrentLayer; -import org.deeplearning4j.nn.conf.BackpropType; -import org.deeplearning4j.nn.conf.CNN2DFormat; -import org.deeplearning4j.nn.conf.CacheMode; -import org.deeplearning4j.nn.conf.InputPreProcessor; -import org.deeplearning4j.nn.conf.NeuralNetBaseBuilderConfiguration; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.RNNFormat; -import org.deeplearning4j.nn.conf.WorkspaceMode; +import org.deeplearning4j.nn.conf.*; import org.deeplearning4j.nn.conf.inputs.InputType; +import org.deeplearning4j.nn.conf.layers.BaseLayerConfiguration; import org.deeplearning4j.nn.conf.layers.FeedForwardLayer; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.recurrent.Bidirectional; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.graph.ComputationGraph; +import org.deeplearning4j.nn.layers.BaseLayer; import org.deeplearning4j.nn.layers.FrozenLayer; import org.deeplearning4j.nn.layers.FrozenLayerWithBackprop; import org.deeplearning4j.nn.layers.LayerHelper; @@ -85,12 +64,7 @@ import org.deeplearning4j.optimize.Solver; import org.deeplearning4j.optimize.api.ConvexOptimizer; import org.deeplearning4j.optimize.api.TrainingListener; import org.deeplearning4j.optimize.solvers.accumulation.GradientsAccumulator; -import org.deeplearning4j.util.Convolution1DUtils; -import org.deeplearning4j.util.ConvolutionUtils; -import org.deeplearning4j.util.CrashReportingUtil; -import org.deeplearning4j.util.ModelSerializer; -import org.deeplearning4j.util.NetworkUtils; -import org.deeplearning4j.util.OutputLayerUtil; +import org.deeplearning4j.util.*; import org.jetbrains.annotations.NotNull; import org.nd4j.adapters.OutputAdapter; import org.nd4j.common.base.Preconditions; @@ -137,17 +111,18 @@ import org.nd4j.linalg.workspace.WorkspaceUtils; * above constitute what is known as a layer, and the transformative function is often referred to * as a unit. The intermediate states—often termed features—are used as the input into another * layer. - *

- * Through repetition of these steps, the artificial neural network learns multiple layers of + * + *

Through repetition of these steps, the artificial neural network learns multiple layers of * non-linear features, which it then combines in a final layer to create a prediction. - *

- * The neural network learns by generating an error signal that measures the difference between the - * predictions of the network and the desired values and then using this error signal to change the - * weights (or parameters) so that predictions get more accurate. + * + *

The neural network learns by generating an error signal that measures the difference between + * the predictions of the network and the desired values and then using this error signal to change + * the weights (or parameters) so that predictions get more accurate. */ @Slf4j -public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serializable, Classifier, - Layer { +@JsonIdentityInfo(generator = ObjectIdGenerators.IntSequenceGenerator.class, property = "@id") +public class MultiLayerNetwork extends ArtificialNeuralNetwork + implements Serializable, Classifier, Layer, ITrainableLayer { /** * Workspace for working memory for a single layer: forward pass and backward pass Note that this @@ -165,78 +140,79 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial * every second layer */ protected static final String WS_LAYER_ACT_1 = "WS_LAYER_ACT_1"; + protected static final String WS_LAYER_ACT_2 = "WS_LAYER_ACT_2"; - /** - * Workspace for output methods that use OutputAdapter - */ + /** Workspace for output methods that use OutputAdapter */ protected static final String WS_OUTPUT_MEM = "WS_OUTPUT_MEM"; - /** - * Workspace for working memory in RNNs - opened and closed once per RNN time step - */ + /** Workspace for working memory in RNNs - opened and closed once per RNN time step */ protected static final String WS_RNN_LOOP_WORKING_MEM = "WS_RNN_LOOP_WORKING_MEM"; - protected static final WorkspaceConfiguration WS_ALL_LAYERS_ACT_CONFIG = WorkspaceConfiguration.builder() - .initialSize(0) - .overallocationLimit(0.05) - .policyLearning(LearningPolicy.FIRST_LOOP) - .policyReset(ResetPolicy.BLOCK_LEFT) - .policySpill(SpillPolicy.REALLOCATE) - .policyAllocation(AllocationPolicy.OVERALLOCATE) - .build(); - protected static final WorkspaceConfiguration WS_RNN_LOOP_WORKING_MEM_CONFIG = WorkspaceConfiguration.builder() - .initialSize(0).overallocationLimit(0.05).policyReset(ResetPolicy.BLOCK_LEFT) - .policyAllocation(AllocationPolicy.OVERALLOCATE).policySpill(SpillPolicy.REALLOCATE) - .policyLearning(LearningPolicy.FIRST_LOOP).build(); - //the hidden neural network layers (including output layer) + protected static final WorkspaceConfiguration WS_ALL_LAYERS_ACT_CONFIG = + WorkspaceConfiguration.builder() + .initialSize(0) + .overallocationLimit(0.05) + .policyLearning(LearningPolicy.FIRST_LOOP) + .policyReset(ResetPolicy.BLOCK_LEFT) + .policySpill(SpillPolicy.REALLOCATE) + .policyAllocation(AllocationPolicy.OVERALLOCATE) + .build(); + protected static final WorkspaceConfiguration WS_RNN_LOOP_WORKING_MEM_CONFIG = + WorkspaceConfiguration.builder() + .initialSize(0) + .overallocationLimit(0.05) + .policyReset(ResetPolicy.BLOCK_LEFT) + .policyAllocation(AllocationPolicy.OVERALLOCATE) + .policySpill(SpillPolicy.REALLOCATE) + .policyLearning(LearningPolicy.FIRST_LOOP) + .build(); + // the hidden neural network layers (including output layer) protected Layer[] layers; - - //Current training data: input features and labels + // Current training data: input features and labels protected INDArray input, labels; protected boolean initCalled = false; protected Collection trainingListeners = new ArrayList<>(); protected Gradient gradient; protected double score; - @Setter - protected boolean initDone = false; - protected INDArray flattenedParams; //Params for all layers are a view/subset of this array + @Setter protected boolean initDone = false; + protected INDArray flattenedParams; // Params for all layers are a view/subset of this array + @Getter - protected transient INDArray flattenedGradients; //Gradients for all layers are a view/subset of this array - protected boolean clearTbpttState = true; //Mainly for unit testing (should be enabled otherwise) + protected transient INDArray + flattenedGradients; // Gradients for all layers are a view/subset of this array + + protected boolean clearTbpttState = true; // Mainly for unit testing (should be enabled otherwise) protected transient ThreadLocal lastEtlTime = new ThreadLocal<>(); protected INDArray mask; - protected int layerIndex; //For LayerConfiguration.get/setIndex() - protected transient Solver solver; //Used to call optimizers during backprop - //Workspaces for CUDNN. Pass to LayerWorkspaceMgr for re-use in cudnn helpers - @Getter - protected transient Map helperWorkspaces = new HashMap<>(); + protected int layerIndex; // For LayerConfiguration.get/setIndex() + protected transient Solver solver; // Used to call optimizers during backprop + // Workspaces for CUDNN. Pass to LayerWorkspaceMgr for re-use in cudnn helpers + @Getter protected transient Map helperWorkspaces = new HashMap<>(); protected WorkspaceConfiguration WS_LAYER_WORKING_MEM_CONFIG; protected WorkspaceConfiguration WS_LAYER_ACT_X_CONFIG; - public MultiLayerNetwork(@NotNull NeuralNetConfiguration conf) { super(conf); - //Working memory: should learn over course of: (a) full forward pass, and (b) full backward pass - //Working memory should be opened once per layer and once per preprocessor, for each of forward and backward passes - int numWorkingMem = 2 * (conf.getFlattenedLayerConfigurations().size() - + conf.getInputPreProcessors().size()); + // Working memory: should learn over course of: (a) full forward pass, and (b) full backward + // pass + // Working memory should be opened once per layer and once per preprocessor, for each of forward + // and backward passes + int numWorkingMem = + 2 * (conf.getFlattenedLayerConfigurations().size() + conf.getInputPreProcessors().size()); WS_LAYER_WORKING_MEM_CONFIG = getLayerWorkingMemWSConfig(numWorkingMem); - WS_LAYER_ACT_X_CONFIG = getLayerActivationWSConfig( - conf.getFlattenedLayerConfigurations().size()); - - init(); + WS_LAYER_ACT_X_CONFIG = + getLayerActivationWSConfig(conf.getFlattenedLayerConfigurations().size()); } public MultiLayerNetwork(@NotNull NeuralNetBaseBuilderConfiguration conf) { - this(( NeuralNetConfiguration) conf); + this((NeuralNetConfiguration) conf); } - /** * Initialize the network based on the configuration (a NeuralNetConfiguration in JSON format) and * parameters array * - * @param conf the configuration json + * @param conf the configuration json * @param params the parameters for the network */ public MultiLayerNetwork(String conf, INDArray params) { @@ -248,7 +224,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial /** * Initialize the network based on the configuration and parameters array * - * @param conf the configuration + * @param conf the configuration * @param params the parameters */ public MultiLayerNetwork(NeuralNetConfiguration conf, INDArray params) { @@ -270,8 +246,10 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } protected static WorkspaceConfiguration getLayerActivationWSConfig(int numLayers) { - //Activations memory: opened once per layer - for every second layer (preprocessors are within the loop). - //Technically we could set learning to numLayers / 2, but will set to numLayers for simplicity, and also to + // Activations memory: opened once per layer - for every second layer (preprocessors are within + // the loop). + // Technically we could set learning to numLayers / 2, but will set to numLayers for simplicity, + // and also to // account for a backward pass return WorkspaceConfiguration.builder() .initialSize(0) @@ -285,19 +263,29 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } /** - * Restore a MultiLayerNetwork to a file, saved using {@link #save(File)} or - * {@link ModelSerializer} + * Restore a MultiLayerNetwork to a file, saved using {@link #save(File)} or {@link + * ModelSerializer} * - * @param f File to load the network from + * @param f File to load the network from * @param loadUpdater If true: load the updater if it is available (i.e., the state array for - * momentum/Adam/rmsprop etc) - use false if no further training is - * required, or true if further training will be undertaken + * momentum/Adam/rmsprop etc) - use false if no further training is required, or + * true if further training will be undertaken * @see ModelSerializer ModelSerializer for more details (and saving/loading via streams) */ public static MultiLayerNetwork load(File f, boolean loadUpdater) throws IOException { return ModelSerializer.restoreMultiLayerNetwork(f, loadUpdater); } + /** + * Get a reference to this neural network. + * + * @return + */ + @Override + public IModel getNet() { + return this; + } + /** * Return the configuration of this layer * @@ -305,7 +293,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial */ @Override public LayerConfiguration getLayerConfiguration() { - //TODO + // TODO throw new RuntimeException( "getLayerConfiguration cannot be called on a MultiLayerNetwork. This function is here because of inheritance from Layer (which should be fixed)."); } @@ -358,9 +346,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial lastEtlTime.set(time); } - /** - * Perform layerwise pretraining for one epoch - see {@link #pretrain(DataSetIterator, int)} - */ + /** Perform layerwise pretraining for one epoch - see {@link #pretrain(DataSetIterator, int)} */ public void pretrain(DataSetIterator iter) { pretrain(iter, 1); } @@ -368,9 +354,9 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial /** * Perform layerwise unsupervised training on all pre-trainable layers in the network (VAEs, * Autoencoders, etc), for the specified number of epochs each. For example, if numEpochs=3, then - * layer 0 will be fit for 3 epochs, followed by layer 1 for 3 epochs, and so on.
Note that - * pretraining will be performed on one layer after the other. To perform unsupervised training on - * a single layer, use {@link #pretrainLayer(int, DataSetIterator)} + * layer 0 will be fit for 3 epochs, followed by layer 1 for 3 epochs, and so on.
+ * Note that pretraining will be performed on one layer after the other. To perform unsupervised + * training on a single layer, use {@link #pretrainLayer(int, DataSetIterator)} * * @param iter Training data */ @@ -384,32 +370,33 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } } - /** - * Fit for one epoch - see {@link #pretrainLayer(int, DataSetIterator, int)} - */ + /** Fit for one epoch - see {@link #pretrainLayer(int, DataSetIterator, int)} */ public void pretrainLayer(int layerIdx, DataSetIterator iter) { pretrainLayer(layerIdx, iter, 1); } /** * Perform layerwise unsupervised training on a single pre-trainable layer in the network (VAEs, - * Autoencoders, etc) for the specified number of epochs
If the specified layer index (0 to - * numLayers - 1) is not a pretrainable layer, this is a no-op. + * Autoencoders, etc) for the specified number of epochs
+ * If the specified layer index (0 to numLayers - 1) is not a pretrainable layer, this is a no-op. * - * @param layerIdx Index of the layer to train (0 to numLayers-1) - * @param iter Training data + * @param layerIdx Index of the layer to train (0 to numLayers-1) + * @param iter Training data * @param numEpochs Number of epochs to fit the specified layer for */ public void pretrainLayer(int layerIdx, DataSetIterator iter, int numEpochs) { - Preconditions.checkState(numEpochs > 0, "Number of epochs (%s) must be a positive number", - numEpochs); + Preconditions.checkState( + numEpochs > 0, "Number of epochs (%s) must be a positive number", numEpochs); if (flattenedGradients == null) { initGradientsView(); } if (layerIdx >= layers.length) { throw new IllegalArgumentException( - "Cannot pretrain layer: layerIdx (" + layerIdx + ") >= numLayers (" + layers.length + "Cannot pretrain layer: layerIdx (" + + layerIdx + + ") >= numLayers (" + + layers.length + ")"); } @@ -419,8 +406,10 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } if (numEpochs > 1 && !iter.resetSupported()) { - throw new IllegalStateException("Cannot fit multiple epochs (" + numEpochs - + ") on an iterator that doesn't support resetting"); + throw new IllegalStateException( + "Cannot fit multiple epochs (" + + numEpochs + + ") on an iterator that doesn't support resetting"); } if (!iter.hasNext() && iter.resetSupported()) { @@ -447,8 +436,8 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial /** * Perform layerwise unsupervised training on a single pre-trainable layer in the network (VAEs, - * Autoencoders, etc)
If the specified layer index (0 to numLayers - 1) is not a pretrainable - * layer, this is a no-op. + * Autoencoders, etc)
+ * If the specified layer index (0 to numLayers - 1) is not a pretrainable layer, this is a no-op. * * @param layerIdx Index of the layer to train (0 to numLayers-1) * @param features Training data array @@ -462,7 +451,10 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } if (layerIdx >= layers.length) { throw new IllegalArgumentException( - "Cannot pretrain layer: layerIdx (" + layerIdx + ") >= numLayers (" + layers.length + "Cannot pretrain layer: layerIdx (" + + layerIdx + + ") >= numLayers (" + + layers.length + ")"); } @@ -470,11 +462,14 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial if (getNetConfiguration().getTrainingWorkspaceMode() == WorkspaceMode.NONE) { workspaceMgr = LayerWorkspaceMgr.noWorkspaces(); } else { - workspaceMgr = LayerWorkspaceMgr.builder() - .defaultWorkspace(WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) - .with(ArrayType.RNN_FF_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, - WS_RNN_LOOP_WORKING_MEM_CONFIG) - .build(); + workspaceMgr = + LayerWorkspaceMgr.builder() + .defaultWorkspace(WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) + .with( + ArrayType.RNN_FF_LOOP_WORKING_MEM, + WS_RNN_LOOP_WORKING_MEM, + WS_RNN_LOOP_WORKING_MEM_CONFIG) + .build(); } workspaceMgr.setHelperWorkspacePointers(helperWorkspaces); @@ -483,15 +478,17 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial return; } - //Do forward pass to the layer to be pretrained + // Do forward pass to the layer to be pretrained INDArray outputOfPrevLayer; if (layerIdx == 0) { outputOfPrevLayer = input; } else { - //Yes, this part of training - but we'll do forward psas as inference mode when doing layerwise training + // Yes, this part of training - but we'll do forward psas as inference mode when doing + // layerwise training // to effectively freeze earlier layers and not apply dropout etc - outputOfPrevLayer = outputOfLayerDetached(false, FwdPassType.STANDARD, layerIndex - 1, - features, null, null, null); + outputOfPrevLayer = + outputOfLayerDetached( + false, FwdPassType.STANDARD, layerIndex - 1, features, null, null, null); } try (MemoryWorkspace ws = workspaceMgr.notifyScopeEntered(ArrayType.FF_WORKING_MEM)) { @@ -500,9 +497,13 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial if (input.size(0) > Integer.MAX_VALUE) { throw new ND4JArraySizeException(); } - outputOfPrevLayer = getNetConfiguration().getInputPreProcess(layerIdx) - .preProcess(outputOfPrevLayer, (int) input.size(0), - LayerWorkspaceMgr.noWorkspaces(helperWorkspaces)); + outputOfPrevLayer = + getNetConfiguration() + .getInputPreProcess(layerIdx) + .preProcess( + outputOfPrevLayer, + (int) input.size(0), + LayerWorkspaceMgr.noWorkspaces(helperWorkspaces)); } layer.fit(outputOfPrevLayer, workspaceMgr); @@ -511,9 +512,9 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial @Override public int batchSize() { - //In 99+% of cases, the input and labels dimension 0 size should be identical - //The only real exceptions: space to batch, and batch to space layers - //In those cases, we should base it on the labels size, as this impacts gradient calculation + // In 99+% of cases, the input and labels dimension 0 size should be identical + // The only real exceptions: space to batch, and batch to space layers + // In those cases, we should base it on the labels size, as this impacts gradient calculation if (input.size(0) > Integer.MAX_VALUE || labels.size(0) > Integer.MAX_VALUE) { throw new ND4JArraySizeException(); } @@ -531,12 +532,13 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } /** - * Get one parameter array for the network.
In MultiLayerNetwork, parameters are keyed like - * "0_W" and "0_b" to mean "weights of layer index 0" and "biases of layer index 0" respectively. - * Numbers increment sequentially, and the suffixes ("W", "b" etc.) depend on the layer type, and - * are defined in the relevant parameter initializers for each layer.
Note that the returned - * INDArrays are views of the underlying network parameters, so modifications of the returned - * arrays will impact the parameters of the network. + * Get one parameter array for the network.
+ * In MultiLayerNetwork, parameters are keyed like "0_W" and "0_b" to mean "weights of layer index + * 0" and "biases of layer index 0" respectively. Numbers increment sequentially, and the suffixes + * ("W", "b" etc.) depend on the layer type, and are defined in the relevant parameter + * initializers for each layer.
+ * Note that the returned INDArrays are views of the underlying network parameters, so + * modifications of the returned arrays will impact the parameters of the network. * * @param param the key of the parameter * @return The specified parameter array for the network @@ -544,7 +546,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial */ @Override public INDArray getParam(String param) { - //Get params for MultiLayerNetwork sub layers. + // Get params for MultiLayerNetwork sub layers. int idx = param.indexOf('_'); if (idx == -1) { throw new IllegalStateException( @@ -556,18 +558,17 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial return layers[layerIdx].getParam(newKey); } - /** - * Returns a map of all parameters in the network as per {@link #getParamTable()}.
Optionally - * (with backpropParamsOnly=true) only the 'backprop' parameters are returned - that is, any - * parameters involved only in unsupervised layerwise pretraining not standard inference/backprop - * are excluded from the returned list. + * Returns a map of all parameters in the network as per {@link #getParamTable()}.
+ * Optionally (with backpropParamsOnly=true) only the 'backprop' parameters are returned - that + * is, any parameters involved only in unsupervised layerwise pretraining not standard + * inference/backprop are excluded from the returned list. * * @param backpropParamsOnly If true, return backprop params only. If false: return all params * @return Parameters for the network */ public Map paramTable(boolean backpropParamsOnly) { - //Get all parameters from all layers + // Get all parameters from all layers Map allParams = new LinkedHashMap<>(); for (int i = 0; i < layers.length; i++) { Map paramMap = layers[i].getParamTable(backpropParamsOnly); @@ -579,63 +580,25 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial return allParams; } - /** - * Intended for internal use - */ + /** Intended for internal use */ @Override public boolean updaterDivideByMinibatch(String paramName) { int idx = paramName.indexOf('_'); int layerIdx = Integer.parseInt(paramName.substring(0, idx)); String subName = paramName.substring(idx + 1); - return getLayer(layerIdx).updaterDivideByMinibatch(subName); + return ((BaseLayer) getLayer(layerIdx)).updaterDivideByMinibatch(subName); } /** - * Set the parameters of the netowrk. Note that the parameter keys must match the format as - * described in {@link #getParam(String)} and {@link #getParamTable()}. Note that the values of the - * parameters used as an argument to this method are copied - i.e., it is safe to later - * modify/reuse the values in the provided paramTable without this impacting the network. - * - * @param paramTable Parameters to set - */ - @Override - public void setParamTable(Map paramTable) { - Map currParamTable = getParamTable(); - if (!currParamTable.keySet().equals(paramTable.keySet())) { - throw new IllegalArgumentException( - "Cannot set param table: parameter keys do not match.\n" + "Current: " - + currParamTable.keySet() + "\nTo set: " + paramTable.keySet()); - } - - for (String s : paramTable.keySet()) { - INDArray curr = currParamTable.get(s); - INDArray toSet = paramTable.get(s); - if (!Arrays.equals(curr.shape(), toSet.shape())) { - throw new IllegalArgumentException( - "Cannot set parameter table: parameter \"" + s + "\" shapes " - + "do not match. Current = " + Arrays.toString(curr.shape()) + ", to set = " - + Arrays.toString(toSet.shape())); - } - } - - //Now that we've checked ALL params (to avoid leaving net in half-modified state) - for (String s : paramTable.keySet()) { - INDArray curr = currParamTable.get(s); - INDArray toSet = paramTable.get(s); - curr.assign(toSet); - } - } - - /** - * Set the values of a single parameter. See {@link #setParamTable(Map)} and - * {@link #getParam(String)} for more details. + * Set the values of a single parameter. See {@link #setParamTable(Map)} and {@link + * #getParam(String)} for more details. * * @param key the key of the parameter to set * @param val the new values for the parameter */ @Override public void setParam(String key, INDArray val) { - //Set params for MultiLayerNetwork sub layers. + // Set params for MultiLayerNetwork sub layers. int idx = key.indexOf('_'); if (idx == -1) { throw new IllegalStateException( @@ -663,81 +626,108 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial * network; if no parameters array is specified, parameters will be initialized randomly according * to the network configuration. * - * @param parameters Network parameter. May be null. If null: randomly initialize. + * @param parameters Network parameter. May be null. If null: randomly initialize. * @param cloneParametersArray Whether the parameter array (if any) should be cloned, or used - * directly + * directly */ public void init(INDArray parameters, boolean cloneParametersArray) { if (initCalled) { + log.trace( + "Initialisation in {} has already been called. Ignoring additional call to init().", + getClass().getSimpleName()); return; } + /** + * Initialize the neural network configuration first. This also triggers inheritance of + * configuration setting where needed. + */ + getNetConfiguration().setNeuralNet(this); + getNetConfiguration() + .init(); // we cannot do this in constructor, as the config might be attached later. + DataType netDtype = getNetConfiguration().getDataType(); if (parameters != null && parameters.dataType() != netDtype) { - Preconditions.checkState(parameters.rank() == 2 && parameters.size(0) == 1, + Preconditions.checkState( + parameters.rank() == 2 && parameters.size(0) == 1, "Invalid parameters array: should be rank 2 with shape [1,numParams]. Got %ndShape", parameters); if (cloneParametersArray) { - try (MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) { + try (MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) { parameters = parameters.castTo(netDtype); } } else { throw new IllegalStateException( - "Error initializing network: Network datatype is set to " + netDtype - + " but provided array has datatype " + parameters.dataType() - + " with cloneParametersArray argument" + - " set to false. Cannot initialize net with specified datatype array if that array does not match network datatype"); + "Error initializing network: Network datatype is set to " + + netDtype + + " but provided array has datatype " + + parameters.dataType() + + " with cloneParametersArray argument" + + " set to false. Cannot initialize net with specified datatype array if that array does not match network datatype"); } } - + /** Set default Training and Inference Workspace modes unless set already */ if (getNetConfiguration().getTrainingWorkspaceMode() == null) { getNetConfiguration().setTrainingWorkspaceMode(WorkspaceMode.NONE); } - if (getNetConfiguration().getInferenceWorkspaceMode() == null) { getNetConfiguration().setInferenceWorkspaceMode(WorkspaceMode.NONE); } - + /** set default Cache mode, unless set already */ if (getNetConfiguration().getCacheMode() == null) { getNetConfiguration().setCacheMode(CacheMode.NONE); } - OneTimeLogger.info(log, + OneTimeLogger.info( + log, // Todo: Why not SLF4J? "Starting MultiLayerNetwork with WorkspaceModes set to [training: {}; inference: {}], cacheMode set to [{}]", getNetConfiguration().getTrainingWorkspaceMode(), getNetConfiguration().getInferenceWorkspaceMode(), getNetConfiguration().getCacheMode()); int nLayers = getNetConfiguration().getFlattenedLayerConfigurations().size(); - if (nLayers < 1) { throw new IllegalStateException("Unable to create network: number of layers is less than 1"); } + /** Initialize the array of Layers for this network using the number of LayerConfigurations */ if (this.layers == null || this.layers[0] == null) { if (this.layers == null) { this.layers = new Layer[nLayers]; } - //First: Work out total length of params + // First: Work out total length of params long paramLength = 0; val nParamsPerLayer = new long[nLayers]; for (int i = 0; i < nLayers; i++) { - LayerConfiguration layer_conf = getNetConfiguration().getFlattenedLayerConfigurations().get(i); - layer_conf.setDataType(netDtype); + LayerConfiguration layer_conf = + getNetConfiguration().getFlattenedLayerConfigurations().get(i); + // Test if Layer type has parameters (is inherited from BaseLayerConfiguration rather then + // LayerConfiguration + if (layer_conf instanceof BaseLayerConfiguration) + ((BaseLayerConfiguration) layer_conf).setDataType(netDtype); + nParamsPerLayer[i] = layer_conf.initializer().numParams(layer_conf); paramLength += nParamsPerLayer[i]; } + log.debug( + "Neural Network {} is initializes with a total number of {} parameters from {} layers.", + getClass().getSimpleName(), + paramLength, + nLayers); - //Create parameters array, if required + // Create parameters array, if required boolean initializeParams; if (parameters != null) { if (!parameters.isRowVectorOrScalar()) { throw new IllegalArgumentException("Invalid parameters: should be a row vector"); } if (parameters.length() != paramLength) { - throw new IllegalArgumentException("Invalid parameters: expected length " + paramLength - + ", got length " + parameters.length()); + throw new IllegalArgumentException( + "Invalid parameters: expected length " + + paramLength + + ", got length " + + parameters.length()); } if (cloneParametersArray) { @@ -751,12 +741,12 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial flattenedParams = Nd4j.create(netDtype, 1, paramLength); initializeParams = true; } else { - //Edge case: 0 params in network + // Edge case: 0 params in network flattenedParams = null; initializeParams = false; } - //Set RNG seed, for repeatability between initializations when set + // Set RNG seed, for repeatability between initializations when set if (initializeParams) { Nd4j.getRandom().setSeed(getNetConfiguration().getSeed()); } @@ -766,33 +756,43 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial for (int i = 0; i < nLayers; i++) { INDArray paramsView; if (nParamsPerLayer[i] > 0) { - paramsView = flattenedParams.get(NDArrayIndex.interval(0, 0, true), - NDArrayIndex.interval(paramCountSoFar, paramCountSoFar + nParamsPerLayer[i])); + paramsView = + flattenedParams.get( + NDArrayIndex.interval(0, 0, true), + NDArrayIndex.interval(paramCountSoFar, paramCountSoFar + nParamsPerLayer[i])); } else { paramsView = null; } paramCountSoFar += nParamsPerLayer[i]; @NonNull LayerConfiguration lc = getNetConfiguration().getFlattenedLayerConfigurations().get(i); - layers[i] = lc.instantiate(lc.getNetConfiguration(), trainingListeners, i, paramsView, initializeParams, + layers[i] = + lc.instantiate( + lc.getNetConfiguration(), + trainingListeners, + i, + paramsView, + initializeParams, netDtype); } initCalled = true; } - //Set parameters in MultiLayerNetwork.getNetConfiguration() for later use in BaseOptimizer.setupSearchState() etc + // Set parameters in MultiLayerNetwork.getNetConfiguration() for later use in + // BaseOptimizer.setupSearchState() etc getNetConfiguration().clearNetWideVariable(); List variables = getNetConfiguration().netWideVariables(false); for (int i = 0; i < layers.length; i++) { if (layers[i] == null) { throw new IllegalStateException( - "Encountered null layer during initialization for layer " + i + - ": " + layers[i].getClass().getSimpleName() - + " initialization " + - "returned null layer?"); + "Encountered null layer during initialization for layer " + + i + + ": " + + layers[i].getClass().getSimpleName() + + " initialization " + + "returned null layer?"); } - - for (String s : layers[i].getNetConfiguration().netWideVariables()) { + for (String s : layers[i].getLayerConfiguration().getVariables()) { variables.add(i + "_" + s); } } @@ -800,14 +800,18 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial // now we init solver & optimizer if (solver == null) { try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { - solver = new Solver.Builder().configure(getNetConfiguration()).listeners(getListeners()).model(this) - .build(); + solver = + new Solver.Builder() + .configure(getNetConfiguration()) + .listeners(this.getTrainingListeners()) + .model(this) + .build(); solver.initOptimizer(); } } - //Mark that input modification is allowed. - //TODO When is it safe to NOT skip the very first layer? It's not always safe... + // Mark that input modification is allowed. + // TODO When is it safe to NOT skip the very first layer? It's not always safe... // For example dropout + iterating over List that is used for multiple epochs... for (int i = 1; i < layers.length; i++) { layers[i].allowInputModification(true); @@ -817,11 +821,12 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } /** - * This method allows you to specificy GradientsAccumulator instance to be used with this - * model
+ * This method allows you to specificy GradientsAccumulator instance to be used with this model + *
*
* PLEASE NOTE: Do not use this method unless you understand how to use GradientsAccumulator & - * updates sharing.
PLEASE NOTE: Do not use this method on standalone model + * updates sharing.
+ * PLEASE NOTE: Do not use this method on standalone model * * @param accumulator Gradient accumulator to use for the network */ @@ -832,8 +837,12 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial if (solver == null) { try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { - solver = new Solver.Builder().configure(getNetConfiguration()).listeners(getListeners()).model(this) - .build(); + solver = + new Solver.Builder() + .configure(getNetConfiguration()) + .listeners(this.getTrainingListeners()) + .model(this) + .build(); } } @@ -857,38 +866,49 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial int nLayers = layers.length; - //First: Work out total length of params + // First: Work out total length of params long paramLength = 0; val nParamsPerLayer = new long[nLayers]; for (int i = 0; i < nLayers; i++) { - LayerConfiguration layerConfiguration = getNetConfiguration().getFlattenedLayerConfigurations().get(i); - nParamsPerLayer[i] = layerConfiguration.initializer().numParams(layerConfiguration); //TODO better initialisation + LayerConfiguration layerConfiguration = + getNetConfiguration().getFlattenedLayerConfigurations().get(i); + nParamsPerLayer[i] = + layerConfiguration + .initializer() + .numParams(layerConfiguration); // TODO better initialisation paramLength += nParamsPerLayer[i]; } if (paramLength > 0) { - flattenedGradients = Nd4j.create(flattenedParams.dataType(), new long[]{1, paramLength}, - 'f'); //No need to initialize, as each layer will do it each iteration anyway + flattenedGradients = + Nd4j.create( + flattenedParams.dataType(), + new long[] {1, paramLength}, + 'f'); // No need to initialize, as each layer will do it each iteration anyway } long paramsSoFar = 0; for (int i = 0; i < layers.length; i++) { if (nParamsPerLayer[i] == 0) { - continue; //This layer doesn't have any parameters... + continue; // This layer doesn't have any parameters... } - INDArray thisLayerGradView = flattenedGradients.get(NDArrayIndex.interval(0, 0, true), - NDArrayIndex.interval(paramsSoFar, paramsSoFar + nParamsPerLayer[i])); + INDArray thisLayerGradView = + flattenedGradients.get( + NDArrayIndex.interval(0, 0, true), + NDArrayIndex.interval(paramsSoFar, paramsSoFar + nParamsPerLayer[i])); layers[i].setBackpropGradientsViewArray(thisLayerGradView); paramsSoFar += nParamsPerLayer[i]; } } } - protected INDArray activationFromPrevLayer(int curr, INDArray input, boolean training, - LayerWorkspaceMgr mgr) { + protected INDArray activationFromPrevLayer( + int curr, INDArray input, boolean training, LayerWorkspaceMgr mgr) { if (getNetConfiguration().getInputPreProcess(curr) != null) { - input = getNetConfiguration().getInputPreProcess(curr) - .preProcess(input, getInputMiniBatchSize(), mgr); + input = + getNetConfiguration() + .getInputPreProcess(curr) + .preProcess(input, getInputMiniBatchSize(), mgr); } INDArray ret = layers[curr].activate(input, training, mgr); @@ -897,12 +917,12 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial /** * Calculate activation for few layers at once. Suitable for autoencoder partial activation. - *

- * In example: in 10-layer deep autoencoder, layers 0 - 4 inclusive are used for encoding part, + * + *

In example: in 10-layer deep autoencoder, layers 0 - 4 inclusive are used for encoding part, * and layers 5-9 inclusive are used for decoding part. * * @param from first layer to be activated, inclusive - * @param to last layer to be activated, inclusive + * @param to last layer to be activated, inclusive * @return the activation from the last layer */ public INDArray activateSelectedLayers(int from, int to, INDArray input) { @@ -917,7 +937,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } try { - LayerWorkspaceMgr mgr = LayerWorkspaceMgr.noWorkspaces(helperWorkspaces); //TODO + LayerWorkspaceMgr mgr = LayerWorkspaceMgr.noWorkspaces(helperWorkspaces); // TODO INDArray res = input; for (int l = from; l <= to; l++) { @@ -936,8 +956,8 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial * activations of layer 0, and so on. * * @param train Training: if true, perform forward pass/inference at training time. Usually, - * inference is performed with train = false. This impacts whether dropout etc is - * applied or not. + * inference is performed with train = false. This impacts whether dropout etc is applied or + * not. * @return The list of activations for each layer, including the input */ public List feedForward(INDArray input, boolean train) { @@ -946,16 +966,16 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } /** - * Compute activations from input to output of the output layer. As per - * {@link #feedForward(INDArray, boolean)} but using the inputs that have previously been set - * using {@link #setInput(INDArray)} + * Compute activations from input to output of the output layer. As per {@link + * #feedForward(INDArray, boolean)} but using the inputs that have previously been set using + * {@link #setInput(INDArray)} * * @return the list of activations for each layer */ public List feedForward(boolean train) { try { - return ffToLayerActivationsDetached(train, FwdPassType.STANDARD, false, layers.length - 1, - input, mask, null, true); + return ffToLayerActivationsDetached( + train, FwdPassType.STANDARD, false, layers.length - 1, input, mask, null, true); } catch (OutOfMemoryError e) { CrashReportingUtil.writeMemoryCrashDump(this, e); throw e; @@ -963,21 +983,21 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } /** - * Perform feed-forward, optionally (not) clearing the layer input arrays.
Note: when using - * clearInputs=false, there can be some performance and memory overhead: this is because the - * arrays are defined outside of workspaces (which are enabled by default) - otherwise, - * old/invalidated arrays could still be accessed after calling this method. Consequently: Don't - * use clearInputs=false unless you have a use case that requires them to remain after - * feed-forward has been completed + * Perform feed-forward, optionally (not) clearing the layer input arrays.
+ * Note: when using clearInputs=false, there can be some performance and memory overhead: this is + * because the arrays are defined outside of workspaces (which are enabled by default) - + * otherwise, old/invalidated arrays could still be accessed after calling this method. + * Consequently: Don't use clearInputs=false unless you have a use case that requires them to + * remain after feed-forward has been completed * - * @param train training mode (true) or test mode (false) + * @param train training mode (true) or test mode (false) * @param clearInputs If false: don't clear the layer inputs * @return Activations from feed-forward */ public List feedForward(boolean train, boolean clearInputs) { try { - return ffToLayerActivationsDetached(train, FwdPassType.STANDARD, false, layers.length - 1, - input, mask, null, clearInputs); + return ffToLayerActivationsDetached( + train, FwdPassType.STANDARD, false, layers.length - 1, input, mask, null, clearInputs); } catch (OutOfMemoryError e) { CrashReportingUtil.writeMemoryCrashDump(this, e); throw e; @@ -985,21 +1005,20 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } /** - * Compute the activations from the input to the specified layer.
To compute activations for - * all layers, use feedForward(...) methods
Note: output list includes the original input. So - * list.get(0) is always the original input, and list.get(i+1) is the activations of the ith - * layer. + * Compute the activations from the input to the specified layer.
+ * To compute activations for all layers, use feedForward(...) methods
+ * Note: output list includes the original input. So list.get(0) is always the original input, and + * list.get(i+1) is the activations of the ith layer. * * @param layerNum Index of the last layer to calculate activations for. Layers are zero-indexed. - * feedForwardToLayer(i,input) will return the activations for layers 0..i - * (inclusive) - * @param input Input to the network + * feedForwardToLayer(i,input) will return the activations for layers 0..i (inclusive) + * @param input Input to the network * @return list of activations. */ public List feedForwardToLayer(int layerNum, INDArray input) { try { - return ffToLayerActivationsDetached(false, FwdPassType.STANDARD, false, layerNum, input, mask, - null, true); + return ffToLayerActivationsDetached( + false, FwdPassType.STANDARD, false, layerNum, input, mask, null, true); } catch (OutOfMemoryError e) { CrashReportingUtil.writeMemoryCrashDump(this, e); throw e; @@ -1007,24 +1026,22 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } /** - * Compute the activations from the input to the specified layer.
To compute activations for - * all layers, use feedForward(...) methods
Note: output list includes the original input. So - * list.get(0) is always the original input, and list.get(i+1) is the activations of the ith - * layer. + * Compute the activations from the input to the specified layer.
+ * To compute activations for all layers, use feedForward(...) methods
+ * Note: output list includes the original input. So list.get(0) is always the original input, and + * list.get(i+1) is the activations of the ith layer. * * @param layerNum Index of the last layer to calculate activations for. Layers are zero-indexed. - * feedForwardToLayer(i,input) will return the activations for layers 0..i - * (inclusive) - * @param input Input to the network - * @param train true for training, false for test (i.e., false if using network after - * training) + * feedForwardToLayer(i,input) will return the activations for layers 0..i (inclusive) + * @param input Input to the network + * @param train true for training, false for test (i.e., false if using network after training) * @return list of activations. */ public List feedForwardToLayer(int layerNum, INDArray input, boolean train) { try { int layerVertexIdx = layers[layerNum].getIndex(); - return ffToLayerActivationsDetached(train, FwdPassType.STANDARD, false, layerVertexIdx, input, - mask, null, true); + return ffToLayerActivationsDetached( + train, FwdPassType.STANDARD, false, layerVertexIdx, input, mask, null, true); } catch (OutOfMemoryError e) { CrashReportingUtil.writeMemoryCrashDump(this, e); throw e; @@ -1033,30 +1050,33 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial /** * Compute the activations from the input to the specified layer, using the currently set input - * for the network.
To compute activations for all layers, use feedForward(...) methods
+ * for the network.
+ * To compute activations for all layers, use feedForward(...) methods
* Note: output list includes the original input. So list.get(0) is always the original input, and * list.get(i+1) is the activations of the ith layer. * * @param layerNum Index of the last layer to calculate activations for. Layers are zero-indexed. - * feedForwardToLayer(i,input) will return the activations for layers 0..i - * (inclusive) - * @param train true for training, false for test (i.e., false if using network after - * training) + * feedForwardToLayer(i,input) will return the activations for layers 0..i (inclusive) + * @param train true for training, false for test (i.e., false if using network after training) * @return list of activations. */ public List feedForwardToLayer(int layerNum, boolean train) { try { - return ffToLayerActivationsDetached(train, FwdPassType.STANDARD, false, layerNum, input, mask, - null, true); + return ffToLayerActivationsDetached( + train, FwdPassType.STANDARD, false, layerNum, input, mask, null, true); } catch (OutOfMemoryError e) { CrashReportingUtil.writeMemoryCrashDump(this, e); throw e; } } - protected void validateArrayWorkspaces(LayerWorkspaceMgr mgr, INDArray array, ArrayType arrayType, + protected void validateArrayWorkspaces( + @NonNull LayerWorkspaceMgr mgr, + @NonNull INDArray array, + @NonNull ArrayType arrayType, int layerIdx, - boolean isPreprocessor, String op) { + boolean isPreprocessor, + String op) { try { mgr.validateArrayLocation(arrayType, array, false, layerIdx > 0); } catch (ND4JWorkspaceException e) { @@ -1068,11 +1088,17 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial clazz = layers[layerIdx].getClass().getName(); } throw new IllegalStateException( - op + ": array (" + arrayType + ") workspace validation failed (" + - (isPreprocessor ? "preprocessor" : "layer ") + layerIdx + (layerName != null ? - " - layer name \"" + - layerName + "\"" : "") + " - class: " + clazz - + ") - array is defined in incorrect workspace", e); + op + + ": array (" + + arrayType + + ") workspace validation failed (" + + (isPreprocessor ? "preprocessor" : "layer ") + + layerIdx + + (layerName != null ? " - layer name \"" + layerName + "\"" : "") + + " - class: " + + clazz + + ") - array is defined in incorrect workspace", + e); } } @@ -1081,46 +1107,54 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial * workspace. Note that no workspace should be active externally when calling this method (an * exception will be thrown if a workspace is open externally) * - * @param train Training mode (true) or test/inference mode (false) - * @param fwdPassType Type of forward pass to perform (STANDARD or - * RNN_ACTIVATE_WITH_STORED_STATE only) - * @param storeLastForTBPTT ONLY used if fwdPassType == - * FwdPassType.RNN_ACTIVATE_WITH_STORED_STATE - * @param layerIndex Index (inclusive) to stop forward pass at. For all layers, use - * numLayers-1 - * @param input Input to the network - * @param fMask Feature mask array. May be null. - * @param lMask Label mask array. May be null. - * @param clearInputs Whether the layer inputs should be cleared + * @param train Training mode (true) or test/inference mode (false) + * @param fwdPassType Type of forward pass to perform (STANDARD or RNN_ACTIVATE_WITH_STORED_STATE + * only) + * @param storeLastForTBPTT ONLY used if fwdPassType == FwdPassType.RNN_ACTIVATE_WITH_STORED_STATE + * @param layerIndex Index (inclusive) to stop forward pass at. For all layers, use numLayers-1 + * @param input Input to the network + * @param fMask Feature mask array. May be null. + * @param lMask Label mask array. May be null. + * @param clearInputs Whether the layer inputs should be cleared * @return List of activations (including the input), detached from any workspace */ - protected synchronized List ffToLayerActivationsDetached(boolean train, + protected synchronized List ffToLayerActivationsDetached( + boolean train, @NonNull FwdPassType fwdPassType, - boolean storeLastForTBPTT, int layerIndex, @NonNull INDArray input, - INDArray fMask, INDArray lMask, boolean clearInputs) { + boolean storeLastForTBPTT, + int layerIndex, + @NonNull INDArray input, + INDArray fMask, + INDArray lMask, + boolean clearInputs) { setInput(input); setLayerMaskArrays(fMask, lMask); - //Verify that no workspace is open externally + // Verify that no workspace is open externally WorkspaceUtils.assertNoWorkspacesOpen( "Expected no workspace active in ffToLayerActivationsDetached"); LayerWorkspaceMgr workspaceMgr; - WorkspaceMode wsm = (train ? getNetConfiguration().getTrainingWorkspaceMode() - : getNetConfiguration().getInferenceWorkspaceMode()); + WorkspaceMode wsm = + (train + ? getNetConfiguration().getTrainingWorkspaceMode() + : getNetConfiguration().getInferenceWorkspaceMode()); if (wsm == WorkspaceMode.NONE) { workspaceMgr = LayerWorkspaceMgr.noWorkspaces(); } else { - workspaceMgr = LayerWorkspaceMgr.builder() - .noWorkspaceFor(ArrayType.ACTIVATIONS) - .with(ArrayType.INPUT, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) - .with(ArrayType.FF_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) - .with(ArrayType.RNN_FF_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, - WS_RNN_LOOP_WORKING_MEM_CONFIG) - .build(); + workspaceMgr = + LayerWorkspaceMgr.builder() + .noWorkspaceFor(ArrayType.ACTIVATIONS) + .with(ArrayType.INPUT, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) + .with(ArrayType.FF_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) + .with( + ArrayType.RNN_FF_LOOP_WORKING_MEM, + WS_RNN_LOOP_WORKING_MEM, + WS_RNN_LOOP_WORKING_MEM_CONFIG) + .build(); if (input.isAttached()) { - //Don't leverage out of async DataSetIterator workspaces + // Don't leverage out of async DataSetIterator workspaces workspaceMgr.setNoLeverageOverride(input.data().getParentWorkspace().getId()); } @@ -1131,17 +1165,26 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial workspaceMgr.setHelperWorkspacePointers(helperWorkspaces); List out = new ArrayList<>(); - out.add(workspaceMgr.leverageTo(ArrayType.INPUT, - input)); //Should be unnecessary (and no op), if layer is implemented correctly + out.add( + workspaceMgr.leverageTo( + ArrayType.INPUT, + input)); // Should be unnecessary (and no op), if layer is implemented correctly for (int i = 0; i <= layerIndex; i++) { - try (MemoryWorkspace wsFFWorking = workspaceMgr.notifyScopeEntered( - ArrayType.FF_WORKING_MEM)) { + try (MemoryWorkspace wsFFWorking = + workspaceMgr.notifyScopeEntered(ArrayType.FF_WORKING_MEM)) { if (getNetConfiguration().getInputPreProcess(i) != null) { - input = getNetConfiguration().getInputPreProcess(i) - .preProcess(input, getInputMiniBatchSize(), workspaceMgr); - //Validation: Exception if invalid (bad preprocessor implementation) - validateArrayWorkspaces(workspaceMgr, input, ArrayType.ACTIVATIONS, i, true, + input = + getNetConfiguration() + .getInputPreProcess(i) + .preProcess(input, getInputMiniBatchSize(), workspaceMgr); + // Validation: Exception if invalid (bad preprocessor implementation) + validateArrayWorkspaces( + workspaceMgr, + input, + ArrayType.ACTIVATIONS, + i, + true, "Feed forward to layer (inference)"); } @@ -1149,15 +1192,17 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial input = layers[i].activate(input, train, workspaceMgr); } else if (fwdPassType == FwdPassType.RNN_ACTIVATE_WITH_STORED_STATE) { if (layers[i] instanceof RecurrentLayer) { - input = ((RecurrentLayer) layers[i]).rnnActivateUsingStoredState(input, train, - storeLastForTBPTT, workspaceMgr); + input = + ((RecurrentLayer) layers[i]) + .rnnActivateUsingStoredState(input, train, storeLastForTBPTT, workspaceMgr); } else if (layers[i] instanceof BaseWrapperLayer && ((BaseWrapperLayer) layers[i]).getUnderlying() instanceof RecurrentLayer) { RecurrentLayer rl = (RecurrentLayer) ((BaseWrapperLayer) layers[i]).getUnderlying(); input = rl.rnnActivateUsingStoredState(input, train, storeLastForTBPTT, workspaceMgr); } else if (layers[i] instanceof MultiLayerNetwork) { - List temp = ((MultiLayerNetwork) layers[i]).rnnActivateUsingStoredState(input, - train, storeLastForTBPTT); + List temp = + ((MultiLayerNetwork) layers[i]) + .rnnActivateUsingStoredState(input, train, storeLastForTBPTT); input = temp.get(temp.size() - 1); } else { input = layers[i].activate(input, train, workspaceMgr); @@ -1167,8 +1212,13 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial "Forward pass type not supported for this method: " + fwdPassType); } - //Validation: Exception if invalid (bad layer implementation) - validateArrayWorkspaces(workspaceMgr, input, ArrayType.ACTIVATIONS, i, false, + // Validation: Exception if invalid (bad layer implementation) + validateArrayWorkspaces( + workspaceMgr, + input, + ArrayType.ACTIVATIONS, + i, + false, "Feed forward to layer (inference)"); out.add(input); @@ -1184,25 +1234,29 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial /** * Feed-forward through the network at training time - returning a list of all activations in a * workspace (WS_ALL_LAYERS_ACT) if workspaces are enabled for training; or detached if no - * workspaces are used.
Note: if using workspaces for training, this method requires that - * WS_ALL_LAYERS_ACT is open externally.
If using NO workspaces, requires that no external - * workspace is open
Note that this method does NOT clear the inputs to each layer - instead, - * they are in the WS_ALL_LAYERS_ACT workspace for use in later backprop. + * workspaces are used.
+ * Note: if using workspaces for training, this method requires that WS_ALL_LAYERS_ACT is open + * externally.
+ * If using NO workspaces, requires that no external workspace is open
+ * Note that this method does NOT clear the inputs to each layer - instead, they are in the + * WS_ALL_LAYERS_ACT workspace for use in later backprop. * - * @param layerIndex Index (inclusive) to stop forward pass at. For all layers, use - * numLayers-1 - * @param fwdPassType Type of forward pass to perform (STANDARD or - * RNN_ACTIVATE_WITH_STORED_STATE only) - * @param storeLastForTBPTT ONLY used if fwdPassType == - * FwdPassType.RNN_ACTIVATE_WITH_STORED_STATE - * @param input Input to network - * @param fMask Feature mask array. May be null - * @param lMask Label mask aray. May be null. + * @param layerIndex Index (inclusive) to stop forward pass at. For all layers, use numLayers-1 + * @param fwdPassType Type of forward pass to perform (STANDARD or RNN_ACTIVATE_WITH_STORED_STATE + * only) + * @param storeLastForTBPTT ONLY used if fwdPassType == FwdPassType.RNN_ACTIVATE_WITH_STORED_STATE + * @param input Input to network + * @param fMask Feature mask array. May be null + * @param lMask Label mask aray. May be null. * @return */ - protected synchronized List ffToLayerActivationsInWs(int layerIndex, - @NonNull FwdPassType fwdPassType, boolean storeLastForTBPTT, - @NonNull INDArray input, INDArray fMask, INDArray lMask) { + protected synchronized List ffToLayerActivationsInWs( + int layerIndex, + @NonNull FwdPassType fwdPassType, + boolean storeLastForTBPTT, + @NonNull INDArray input, + INDArray fMask, + INDArray lMask) { setInput(input); setLayerMaskArrays(fMask, lMask); @@ -1212,44 +1266,55 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial "Expected no workspace active in ffToLayerActivationsInWs when training workspace is set to NONE"); workspaceMgr = LayerWorkspaceMgr.noWorkspaces(); } else { - workspaceMgr = LayerWorkspaceMgr.builder() - .with(ArrayType.INPUT, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) - .with(ArrayType.ACTIVATIONS, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) - .with(ArrayType.FF_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) - .with(ArrayType.RNN_FF_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, - WS_RNN_LOOP_WORKING_MEM_CONFIG) - .build(); + workspaceMgr = + LayerWorkspaceMgr.builder() + .with(ArrayType.INPUT, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) + .with(ArrayType.ACTIVATIONS, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) + .with(ArrayType.FF_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) + .with( + ArrayType.RNN_FF_LOOP_WORKING_MEM, + WS_RNN_LOOP_WORKING_MEM, + WS_RNN_LOOP_WORKING_MEM_CONFIG) + .build(); if (input.isAttached()) { - //Don't leverage out of async DataSetIterator workspaces + // Don't leverage out of async DataSetIterator workspaces workspaceMgr.setNoLeverageOverride(input.data().getParentWorkspace().getId()); } if (getNetConfiguration().getCacheMode() != CacheMode.NONE) { - //For now: store cache mode activations in activations workspace + // For now: store cache mode activations in activations workspace workspaceMgr.setWorkspace(ArrayType.FF_CACHE, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG); - workspaceMgr.setWorkspace(ArrayType.BP_WORKING_MEM, WS_LAYER_WORKING_MEM, - WS_LAYER_WORKING_MEM_CONFIG); + workspaceMgr.setWorkspace( + ArrayType.BP_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG); } - WorkspaceUtils.assertOpenAndActive(WS_ALL_LAYERS_ACT, + WorkspaceUtils.assertOpenAndActive( + WS_ALL_LAYERS_ACT, "ffToLayerActivationsInWs method requires workspace WS_ALL_LAYERS_ACT to be open"); } workspaceMgr.setHelperWorkspacePointers(helperWorkspaces); List out = new ArrayList<>(); - out.add(workspaceMgr.leverageTo(ArrayType.INPUT, input)); //Probably unnecessary usually + out.add(workspaceMgr.leverageTo(ArrayType.INPUT, input)); // Probably unnecessary usually boolean traceLog = log.isTraceEnabled(); for (int i = 0; i <= layerIndex; i++) { - try (MemoryWorkspace wsFFWorking = workspaceMgr.notifyScopeEntered( - ArrayType.FF_WORKING_MEM)) { + try (MemoryWorkspace wsFFWorking = + workspaceMgr.notifyScopeEntered(ArrayType.FF_WORKING_MEM)) { if (getNetConfiguration().getInputPreProcess(i) != null) { - input = getNetConfiguration().getInputPreProcess(i) - .preProcess(input, getInputMiniBatchSize(), workspaceMgr); - //Validation: Exception if invalid (bad preprocessor implementation) - validateArrayWorkspaces(workspaceMgr, input, ArrayType.ACTIVATIONS, i, true, + input = + getNetConfiguration() + .getInputPreProcess(i) + .preProcess(input, getInputMiniBatchSize(), workspaceMgr); + // Validation: Exception if invalid (bad preprocessor implementation) + validateArrayWorkspaces( + workspaceMgr, + input, + ArrayType.ACTIVATIONS, + i, + true, "Feed forward to layer (training)"); } @@ -1261,15 +1326,17 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial input = layers[i].activate(input, true, workspaceMgr); } else if (fwdPassType == FwdPassType.RNN_ACTIVATE_WITH_STORED_STATE) { if (layers[i] instanceof RecurrentLayer) { - input = ((RecurrentLayer) layers[i]).rnnActivateUsingStoredState(input, true, - storeLastForTBPTT, workspaceMgr); + input = + ((RecurrentLayer) layers[i]) + .rnnActivateUsingStoredState(input, true, storeLastForTBPTT, workspaceMgr); } else if (layers[i] instanceof BaseWrapperLayer && ((BaseWrapperLayer) layers[i]).getUnderlying() instanceof RecurrentLayer) { RecurrentLayer rl = (RecurrentLayer) ((BaseWrapperLayer) layers[i]).getUnderlying(); input = rl.rnnActivateUsingStoredState(input, true, storeLastForTBPTT, workspaceMgr); } else if (layers[i] instanceof MultiLayerNetwork) { - List temp = ((MultiLayerNetwork) layers[i]).rnnActivateUsingStoredState(input, - true, storeLastForTBPTT); + List temp = + ((MultiLayerNetwork) layers[i]) + .rnnActivateUsingStoredState(input, true, storeLastForTBPTT); input = temp.get(temp.size() - 1); } else { input = layers[i].activate(input, true, workspaceMgr); @@ -1283,10 +1350,27 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial throw new IllegalStateException("LayerConfiguration " + i + " returned null activations"); } - //Validation: Exception if invalid (bad layer implementation) - validateArrayWorkspaces(workspaceMgr, input, ArrayType.ACTIVATIONS, i, false, + // Validation: Exception if invalid (bad layer implementation) + validateArrayWorkspaces( + workspaceMgr, + input, + ArrayType.ACTIVATIONS, + i, + false, "Feed forward to layer (training)"); - validateArrayWorkspaces(workspaceMgr, layers[i].input(), ArrayType.INPUT, i, false, + if (layers[i].input() == null) { + log.error( + "Input for layer {} at index {} cannot be null.", + layers[i].getLayerConfiguration().getLayerName(), + i); + throw new RuntimeException("Layer input is null."); + } + validateArrayWorkspaces( + workspaceMgr, + layers[i].input(), + ArrayType.INPUT, + i, + false, "Feed forward to layer (training)"); out.add(input); @@ -1302,92 +1386,110 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial /** * Provide the output of the specified layer, detached from any workspace. This is most commonly - * used at inference/test time, and is more memory efficient than - * {@link #ffToLayerActivationsDetached(boolean, FwdPassType, boolean, int, INDArray, INDArray, - * INDArray, boolean)} and - * {@link #ffToLayerActivationsInWs(int, FwdPassType, boolean, INDArray, INDArray, INDArray)}.
+ * used at inference/test time, and is more memory efficient than {@link + * #ffToLayerActivationsDetached(boolean, FwdPassType, boolean, int, INDArray, INDArray, INDArray, + * boolean)} and {@link #ffToLayerActivationsInWs(int, FwdPassType, boolean, INDArray, INDArray, + * INDArray)}.
* This method clears all layer inputs. - *

- * NOTE: in general, no workspaces should be activated externally for this method! This method + * + *

NOTE: in general, no workspaces should be activated externally for this method! This method * handles the workspace activation as required * - * @param train Training mode (true) or test/inference mode (false) - * @param fwdPassType Type of forward pass to perform (STANDARD, RNN_TIMESTEP or - * RNN_ACTIVATE_WITH_STORED_STATE) - * @param layerIndex Index (inclusive) to stop forward pass at. For all layers, use - * numLayers-1 - * @param input Input to the network - * @param featureMask Input/feature mask array. May be null. - * @param labelsMask Labels mask array. May be null + * @param train Training mode (true) or test/inference mode (false) + * @param fwdPassType Type of forward pass to perform (STANDARD, RNN_TIMESTEP or + * RNN_ACTIVATE_WITH_STORED_STATE) + * @param layerIndex Index (inclusive) to stop forward pass at. For all layers, use numLayers-1 + * @param input Input to the network + * @param featureMask Input/feature mask array. May be null. + * @param labelsMask Labels mask array. May be null * @param outputWorkspace Optional - if provided, outputs should be placed in this workspace. - * NOTE: this workspace must be open + * NOTE: this workspace must be open * @return Output of the specified layer, detached from any workspace */ - protected INDArray outputOfLayerDetached(boolean train, @NonNull FwdPassType fwdPassType, - int layerIndex, @NonNull INDArray input, - INDArray featureMask, INDArray labelsMask, MemoryWorkspace outputWorkspace) { + protected INDArray outputOfLayerDetached( + boolean train, + @NonNull FwdPassType fwdPassType, + int layerIndex, + @NonNull INDArray input, + INDArray featureMask, + INDArray labelsMask, + MemoryWorkspace outputWorkspace) { setInput(input); setLayerMaskArrays(featureMask, labelsMask); - /* - Idea here: we want to minimize memory, and return only the final array - Approach to do this: keep activations in memory only as long as we need them. - In MultiLayerNetwork, the output activations of layer X are used as input to layer X+1 - Which means: the workspace for layer X has to be open for both layers X and X+1 forward pass. + /* + Idea here: we want to minimize memory, and return only the final array + Approach to do this: keep activations in memory only as long as we need them. + In MultiLayerNetwork, the output activations of layer X are used as input to layer X+1 + Which means: the workspace for layer X has to be open for both layers X and X+1 forward pass. - Here, we'll use two workspaces for activations: - 1. For even index layers, activations WS that opens on start of even layer fwd pass, closes at end of odd layer fwd pass - 2. For odd index layers, activations WS that opens on start of odd layer fwd pass, closes at end of even layer fwd pass + Here, we'll use two workspaces for activations: + 1. For even index layers, activations WS that opens on start of even layer fwd pass, closes at end of odd layer fwd pass + 2. For odd index layers, activations WS that opens on start of odd layer fwd pass, closes at end of even layer fwd pass - Additionally, we'll reconfigure the workspace manager for the *final* layer, so that we don't have to detach - */ + Additionally, we'll reconfigure the workspace manager for the *final* layer, so that we don't have to detach + */ if (outputWorkspace == null || outputWorkspace instanceof DummyWorkspace) { - WorkspaceUtils.assertNoWorkspacesOpen("Expected no workspace active in outputOfLayerDetached", - true); + WorkspaceUtils.assertNoWorkspacesOpen( + "Expected no workspace active in outputOfLayerDetached", true); } else { - Preconditions.checkState(outputWorkspace.isScopeActive(), - "Workspace \"" + outputWorkspace.getId() + - "\" was provided for the network/layer outputs. When provided, this workspace must be opened before " - + - "calling the output method; furthermore, closing the workspace is the responsibility of the user"); + Preconditions.checkState( + outputWorkspace.isScopeActive(), + "Workspace \"" + + outputWorkspace.getId() + + "\" was provided for the network/layer outputs. When provided, this workspace must be opened before " + + "calling the output method; furthermore, closing the workspace is the responsibility of the user"); } LayerWorkspaceMgr mgrEven; LayerWorkspaceMgr mgrOdd; - WorkspaceMode wsm = train ? getNetConfiguration().getTrainingWorkspaceMode() - : getNetConfiguration().getInferenceWorkspaceMode(); + WorkspaceMode wsm = + train + ? getNetConfiguration().getTrainingWorkspaceMode() + : getNetConfiguration().getInferenceWorkspaceMode(); if (wsm == WorkspaceMode.NONE) { mgrEven = LayerWorkspaceMgr.noWorkspaces(); mgrOdd = mgrEven; - //Check for external workspace - doesn't make sense to have one with workspace mode NONE + // Check for external workspace - doesn't make sense to have one with workspace mode NONE if (outputWorkspace != null && !(outputWorkspace instanceof DummyWorkspace)) { - throw new IllegalStateException("Workspace \"" + outputWorkspace.getId() + - "\" was provided for the network/layer outputs, however " + (train ? "training" - : "inference") + - " workspace mode is set to NONE. Cannot put output activations into the specified workspace if" - + - "workspaces are disabled for the network. use getNetConfiguration().setTraining/InferenceWorkspaceMode(WorkspaceMode.ENABLED)"); + throw new IllegalStateException( + "Workspace \"" + + outputWorkspace.getId() + + "\" was provided for the network/layer outputs, however " + + (train ? "training" : "inference") + + " workspace mode is set to NONE. Cannot put output activations into the specified workspace if" + + "workspaces are disabled for the network. use getNetConfiguration().setTraining/InferenceWorkspaceMode(WorkspaceMode.ENABLED)"); } } else { - mgrEven = LayerWorkspaceMgr.builder() - .with(ArrayType.FF_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) - .with(ArrayType.ACTIVATIONS, WS_LAYER_ACT_1, WS_LAYER_ACT_X_CONFIG) - .with(ArrayType.INPUT, WS_LAYER_ACT_2, - WS_LAYER_ACT_X_CONFIG) //Inputs should always be in the previous WS - .with(ArrayType.RNN_FF_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, - WS_RNN_LOOP_WORKING_MEM_CONFIG) - .build(); + mgrEven = + LayerWorkspaceMgr.builder() + .with(ArrayType.FF_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) + .with(ArrayType.ACTIVATIONS, WS_LAYER_ACT_1, WS_LAYER_ACT_X_CONFIG) + .with( + ArrayType.INPUT, + WS_LAYER_ACT_2, + WS_LAYER_ACT_X_CONFIG) // Inputs should always be in the previous WS + .with( + ArrayType.RNN_FF_LOOP_WORKING_MEM, + WS_RNN_LOOP_WORKING_MEM, + WS_RNN_LOOP_WORKING_MEM_CONFIG) + .build(); - mgrOdd = LayerWorkspaceMgr.builder() - .with(ArrayType.FF_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) - .with(ArrayType.ACTIVATIONS, WS_LAYER_ACT_2, WS_LAYER_ACT_X_CONFIG) - .with(ArrayType.INPUT, WS_LAYER_ACT_1, - WS_LAYER_ACT_X_CONFIG) //Inputs should always be in the previous WS - .with(ArrayType.RNN_FF_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, - WS_RNN_LOOP_WORKING_MEM_CONFIG) - .build(); + mgrOdd = + LayerWorkspaceMgr.builder() + .with(ArrayType.FF_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) + .with(ArrayType.ACTIVATIONS, WS_LAYER_ACT_2, WS_LAYER_ACT_X_CONFIG) + .with( + ArrayType.INPUT, + WS_LAYER_ACT_1, + WS_LAYER_ACT_X_CONFIG) // Inputs should always be in the previous WS + .with( + ArrayType.RNN_FF_LOOP_WORKING_MEM, + WS_RNN_LOOP_WORKING_MEM, + WS_RNN_LOOP_WORKING_MEM_CONFIG) + .build(); } mgrEven.setHelperWorkspacePointers(helperWorkspaces); mgrOdd.setHelperWorkspacePointers(helperWorkspaces); @@ -1407,64 +1509,74 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial log.trace("About to forward pass: {} - {}", i, layers[i].getClass().getSimpleName()); } - //Edge case: for first layer with dropout, inputs can't be in previous workspace (as it hasn't been opened yet) - //Hence: put inputs in working memory + // Edge case: for first layer with dropout, inputs can't be in previous workspace (as it + // hasn't been opened yet) + // Hence: put inputs in working memory if (i == 0 && wsm != WorkspaceMode.NONE) { mgr.setWorkspace(ArrayType.INPUT, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG); } - try (MemoryWorkspace wsFFWorking = mgr.notifyScopeEntered( - ArrayType.FF_WORKING_MEM)) { //Working memory: opened/closed once per layer - //Activations workspaces: opened/closed every second layer. - //So mgrEven (WS_LAYER_ACT_1) open at start of 0, 2, 4, 8; closed at end of 1, 3, 5, 7 etc - //and mgrOdd (WS_LAYER_ACT_2) opened at start of 1, 3, 5, 7; closed at end of 2, 4, 6, 8 etc + try (MemoryWorkspace wsFFWorking = + mgr.notifyScopeEntered( + ArrayType.FF_WORKING_MEM)) { // Working memory: opened/closed once per layer + // Activations workspaces: opened/closed every second layer. + // So mgrEven (WS_LAYER_ACT_1) open at start of 0, 2, 4, 8; closed at end of 1, 3, 5, 7 + // etc + // and mgrOdd (WS_LAYER_ACT_2) opened at start of 1, 3, 5, 7; closed at end of 2, 4, 6, 8 + // etc temp = mgr.notifyScopeEntered(ArrayType.ACTIVATIONS); - //Note that because we're opening activation workspaces not in a simple nested order, we'll manually - // override the previous workspace setting. Otherwise, when we close these workspaces, the "current" + // Note that because we're opening activation workspaces not in a simple nested order, + // we'll manually + // override the previous workspace setting. Otherwise, when we close these workspaces, the + // "current" // workspace may be set to the incorrect one temp.setPreviousWorkspace(initialWorkspace); if (i == 0 && input.isAttached()) { - //Don't leverage out of async DataSetIterator workspaces + // Don't leverage out of async DataSetIterator workspaces mgr.setNoLeverageOverride(input.data().getParentWorkspace().getId()); } if (getNetConfiguration().getInputPreProcess(i) != null) { - input = getNetConfiguration().getInputPreProcess(i) - .preProcess(input, getInputMiniBatchSize(), mgr); - //Validation: Exception if invalid (bad preprocessor implementation) - validateArrayWorkspaces(mgr, input, ArrayType.ACTIVATIONS, i, true, - "Output of layer (inference)"); + input = + getNetConfiguration() + .getInputPreProcess(i) + .preProcess(input, getInputMiniBatchSize(), mgr); + // Validation: Exception if invalid (bad preprocessor implementation) + validateArrayWorkspaces( + mgr, input, ArrayType.ACTIVATIONS, i, true, "Output of layer (inference)"); } if (i == layerIndex) { if (outputWorkspace != null && !(outputWorkspace instanceof DummyWorkspace)) { - //Place activations in user-specified workspace - mgr.setWorkspace(ArrayType.ACTIVATIONS, outputWorkspace.getId(), + // Place activations in user-specified workspace + mgr.setWorkspace( + ArrayType.ACTIVATIONS, + outputWorkspace.getId(), outputWorkspace.getWorkspaceConfiguration()); } else { - //Final activations: should be detached + // Final activations: should be detached mgr.setScopedOutFor(ArrayType.ACTIVATIONS); } } if (fwdPassType == FwdPassType.STANDARD) { - //Standard feed-forward case - if (i > 0 && ConvolutionUtils.layerHasConvolutionLayout( - layers[i - 1].getLayerConfiguration()) + // Standard feed-forward case + if (i > 0 + && ConvolutionUtils.layerHasConvolutionLayout(layers[i - 1].getLayerConfiguration()) && ConvolutionUtils.layerHasConvolutionLayout(layers[i].getLayerConfiguration())) { - CNN2DFormat preLayerFormat = ConvolutionUtils.getFormatForLayer( - layers[i - 1].getLayerConfiguration()); - CNN2DFormat currLayerFormat = ConvolutionUtils.getFormatForLayer( - layers[i].getLayerConfiguration()); + CNN2DFormat preLayerFormat = + ConvolutionUtils.getFormatForLayer(layers[i - 1].getLayerConfiguration()); + CNN2DFormat currLayerFormat = + ConvolutionUtils.getFormatForLayer(layers[i].getLayerConfiguration()); if (preLayerFormat != currLayerFormat) { - //NHWC case + // NHWC case if (preLayerFormat == CNN2DFormat.NCHW) { input = input.permute(0, 3, 1, 2); } - //NCHW case + // NCHW case else if (preLayerFormat == CNN2DFormat.NHWC) { input = input.permute(0, 2, 3, 1); @@ -1475,26 +1587,25 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } input = layers[i].activate(input, train, mgr); - } else if (i > 0 && Convolution1DUtils.hasRnnDataFormat( - layers[i - 1].getLayerConfiguration()) + } else if (i > 0 + && Convolution1DUtils.hasRnnDataFormat(layers[i - 1].getLayerConfiguration()) && Convolution1DUtils.hasRnnDataFormat(layers[i].getLayerConfiguration())) { - RNNFormat preLayerFormat = Convolution1DUtils.getRnnFormatFromLayer( - layers[i - 1].getLayerConfiguration()); - RNNFormat currLayerFormat = Convolution1DUtils.getRnnFormatFromLayer( - layers[i].getLayerConfiguration()); - //permute for next layer + RNNFormat preLayerFormat = + Convolution1DUtils.getRnnFormatFromLayer(layers[i - 1].getLayerConfiguration()); + RNNFormat currLayerFormat = + Convolution1DUtils.getRnnFormatFromLayer(layers[i].getLayerConfiguration()); + // permute for next layer if (preLayerFormat != currLayerFormat) { input = input.permute(0, 2, 1); } input = layers[i].activate(input, train, mgr); - } else { input = layers[i].activate(input, train, mgr); } } else if (fwdPassType == FwdPassType.RNN_TIMESTEP) { - //rnnTimeStep case + // rnnTimeStep case if (layers[i] instanceof RecurrentLayer) { input = ((RecurrentLayer) layers[i]).rnnTimeStep(reshapeTimeStepInput(input), mgr); } else if (layers[i] instanceof BaseWrapperLayer @@ -1511,9 +1622,9 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial "Unsupported forward pass type for this method: " + fwdPassType); } layers[i].clear(); - //Validation: Exception if invalid (bad layer implementation) - validateArrayWorkspaces(mgr, input, ArrayType.ACTIVATIONS, i, false, - "Output of layer (inference)"); + // Validation: Exception if invalid (bad layer implementation) + validateArrayWorkspaces( + mgr, input, ArrayType.ACTIVATIONS, i, false, "Output of layer (inference)"); if (wsActCloseNext != null) { wsActCloseNext.close(); @@ -1526,11 +1637,14 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial log.trace("Completed forward pass: {} - {}", i, layers[i].getClass().getSimpleName()); } - //Edge case: for first layer with dropout, inputs can't be in previous workspace (as it hasn't been opened yet) - //Hence: put inputs in working memory -> set back to default for next use of workspace mgr + // Edge case: for first layer with dropout, inputs can't be in previous workspace (as it + // hasn't been opened yet) + // Hence: put inputs in working memory -> set back to default for next use of workspace mgr if (i == 0 && wsm != WorkspaceMode.NONE) { - mgr.setWorkspace(ArrayType.INPUT, WS_LAYER_ACT_2, - WS_LAYER_ACT_X_CONFIG); //Inputs should always be in the previous WS + mgr.setWorkspace( + ArrayType.INPUT, + WS_LAYER_ACT_2, + WS_LAYER_ACT_X_CONFIG); // Inputs should always be in the previous WS } } } catch (Throwable t2) { @@ -1549,9 +1663,10 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } } if (temp != null) { - //Should only be non-null on exception + // Should only be non-null on exception while (temp.isScopeActive()) { - //For safety, should never occur in theory: a single close() call may not be sufficient, if + // For safety, should never occur in theory: a single close() call may not be sufficient, + // if // workspace scope was borrowed and not properly closed when exception occurred try { temp.close(); @@ -1579,9 +1694,10 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial WorkspaceUtils.assertNoWorkspacesOpen( "Expected no workspace active at the end of outputOfLayerDetached", true); } else { - Preconditions.checkState(outputWorkspace.isScopeActive(), - "Expected output workspace to still be open" + - "at end of outputOfLayerDetached, but it is closed. This suggests an implementation or layer workspace problem"); + Preconditions.checkState( + outputWorkspace.isScopeActive(), + "Expected output workspace to still be open" + + "at end of outputOfLayerDetached, but it is closed. This suggests an implementation or layer workspace problem"); } } @@ -1624,8 +1740,8 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial * Compute the activations from the input to the output layer, given mask arrays (that may be * null) The masking arrays are used in situations such an one-to-many and many-to-one rucerrent * neural network (RNN) designs, as well as for supporting time series of varying lengths within - * the same minibatch for RNNs. Other than mask arrays, this is equivalent to calling - * {@link #feedForward(INDArray, boolean)} with train = false + * the same minibatch for RNNs. Other than mask arrays, this is equivalent to calling {@link + * #feedForward(INDArray, boolean)} with train = false */ public List feedForward(INDArray input, INDArray featuresMask, INDArray labelsMask) { setLayerMaskArrays(featuresMask, labelsMask); @@ -1641,14 +1757,14 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial @Override public Pair gradientAndScore() { - return new Pair<>(gradient(), score()); + return new Pair<>(gradient(), getScore()); } /** * Clone the MultiLayerNetwork * * @return A cloned MultiLayerNetwork with a copy of the configuration, parameters and updater - * identical to the current network. + * identical to the current network. */ @Override public MultiLayerNetwork clone() { @@ -1657,10 +1773,11 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } NeuralNetConfiguration conf = this.getNetConfiguration().clone(); MultiLayerNetwork ret = new MultiLayerNetwork(conf); - ret.init(this.params().dup(), false); + ret.init(this.getModelParams().dup(), false); if (solver != null) { - //If solver is null: updater hasn't been initialized -> getUpdater call will force initialization, however + // If solver is null: updater hasn't been initialized -> getUpdater call will force + // initialization, however Updater u = this.getUpdater(); INDArray updaterState = u.getStateViewArray(); if (updaterState != null) { @@ -1669,7 +1786,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } if (hasAFrozenLayer()) { - //correct layers to frozen layers + // correct layers to frozen layers Layer[] clonedLayers = ret.getLayers(); for (int i = 0; i < layers.length; i++) { if (layers[i] instanceof FrozenLayer) { @@ -1691,84 +1808,28 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } /** - * @deprecated To be removed. Use {@link #params()} instead + * @deprecated To be removed. Use {@link #getModelParams()} instead */ @Deprecated public INDArray params(boolean backwardOnly) { - return params(); + return getModelParams(); } /** * Returns a 1 x m vector where the vector is composed of a flattened vector of all of the - * parameters in the network.
See {@link #getParam(String)} and {@link #getParamTable()} for a - * more useful/interpretable representation of the parameters.
Note that the parameter vector - * is not a copy, and changes to the returned INDArray will impact the network parameters. + * parameters in the network.
+ * See {@link #getParam(String)} and {@link #getParamTable()} for a more useful/interpretable + * representation of the parameters.
+ * Note that the parameter vector is not a copy, and changes to the returned INDArray will impact + * the network parameters. * * @return the parameters for this neural net */ @Override - public INDArray params() { + public INDArray getModelParams() { return flattenedParams; } - /** - * The param table - * - * @return - */ - @Override - public Map getParamTable() { - return null; - } - - /** - * Table of parameters by key, for backprop. For many models (dense layers, etc) - all parameters - * are backprop parameters - * - * @param backpropParamsOnly If true, return backprop params only. If false: return all params - * (equivalent to paramsTable()) - */ - @Override - public Map getParamTable(boolean backpropParamsOnly) { - return null; - } - - /** - * Set the parameters for this model. This expects a linear ndarray which then be unpacked - * internally relative to the expected ordering of the model.
See also: - * {@link #setParamTable(Map)} and {@link #setParam(String, INDArray)} - * - * @param params the parameters for the model - */ - @Override - public void setParams(INDArray params) { - if (flattenedParams == params) { - return; //No op - } - - if (flattenedParams != null && params.length() == flattenedParams.length()) { - if (params != flattenedParams) { - flattenedParams.assign(params); - } - } else { - if (flattenedParams == null) { - flattenedParams = params.dup(); - } - int idx = 0; - for (int i = 0; i < getLayers().length; i++) { - Layer layer = getLayer(i); - long range = layer.numParams(); - if (range <= 0) { - continue; //Some layers: no parameters (subsampling, etc) - } - INDArray get = params.get(NDArrayIndex.interval(0, 0, true), - NDArrayIndex.interval(idx, range + idx)); - layer.setParams(get); - idx += range; - } - } - } - @Override public void setParamsViewArray(INDArray params) { throw new UnsupportedOperationException("Not yet implemented"); @@ -1786,14 +1847,16 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial if (layer.numParams() == 0) { continue; } - layer.setBackpropGradientsViewArray(gradients.get(NDArrayIndex.interval(0, 0, true), - NDArrayIndex.interval(paramsSoFar, paramsSoFar + layer.numParams()))); + layer.setBackpropGradientsViewArray( + gradients.get( + NDArrayIndex.interval(0, 0, true), + NDArrayIndex.interval(paramsSoFar, paramsSoFar + layer.numParams()))); paramsSoFar += layer.numParams(); } } @Override - public TrainingConfig getConfig() { + public ITraininableLayerConfiguration getTrainingConfig() { throw new UnsupportedOperationException("Not supported"); } @@ -1807,14 +1870,58 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial if (!isInitCalled()) { init(); } - return flattenedParams == null ? 0 : flattenedParams.length(); //Maybe nul for 0 params net + return flattenedParams == null ? 0 : flattenedParams.length(); // Maybe nul for 0 params net + } + + /** + * @return 1d parameter vector + */ + @Override + public INDArray getParams() { + throw new RuntimeException("Calling getParams on the MultiLazerNetwork !?"); + } + + /** + * Set the parameters for this model. This expects a linear ndarray which then be unpacked + * internally relative to the expected ordering of the model.
+ * See also: {@link #setParamTable(Map)} and {@link #setParam(String, INDArray)} + * + * @param params the parameters for the model + */ + @Override + public void setParams(INDArray params) { + if (flattenedParams == params) { + return; // No op + } + + if (flattenedParams != null && params.length() == flattenedParams.length()) { + if (params != flattenedParams) { + flattenedParams.assign(params); + } + } else { + if (flattenedParams == null) { + flattenedParams = params.dup(); + } + int idx = 0; + for (int i = 0; i < getLayers().length; i++) { + Layer layer = getLayer(i); + long range = layer.numParams(); + if (range <= 0) { + continue; // Some layers: no parameters (subsampling, etc) + } + INDArray get = + params.get(NDArrayIndex.interval(0, 0, true), NDArrayIndex.interval(idx, range + idx)); + layer.setParams(get); + idx += range; + } + } } /** * Returns the number of parameters in the network * * @param backwards If true: exclude any parameters uned only in unsupervised layerwise training - * (such as the decoder parameters in an autoencoder) + * (such as the decoder parameters in an autoencoder) * @return The number of parameters */ @Override @@ -1843,15 +1950,16 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial * Perform minibatch training on all minibatches in the DataSetIterator, for the specified number * of epochs. Equvalent to calling {@link #fit(DataSetIterator)} numEpochs times in a loop * - * @param iterator Training data (DataSetIterator). Iterator must support resetting + * @param iterator Training data (DataSetIterator). Iterator must support resetting * @param numEpochs Number of training epochs, >= 1 */ public void fit(@NonNull DataSetIterator iterator, int numEpochs) { - Preconditions.checkArgument(numEpochs > 0, "Number of epochs much be > 0. Got numEpochs = %s", - numEpochs); - Preconditions.checkArgument(numEpochs == 1 || iterator.resetSupported(), - "Cannot perform multiple epochs training using" + - "iterator thas does not support resetting (iterator.resetSupported() returned false)"); + Preconditions.checkArgument( + numEpochs > 0, "Number of epochs much be > 0. Got numEpochs = %s", numEpochs); + Preconditions.checkArgument( + numEpochs == 1 || iterator.resetSupported(), + "Cannot perform multiple epochs training using" + + "iterator thas does not support resetting (iterator.resetSupported() returned false)"); for (int i = 0; i < numEpochs; i++) { fit(iterator); @@ -1859,9 +1967,9 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } /** - * Perform minibatch training on all minibatches in the DataSetIterator for 1 epoch.
Note that - * this method does not do layerwise pretraining.
For pretraining use method pretrain.. - * {@link #pretrain(DataSetIterator)}
+ * Perform minibatch training on all minibatches in the DataSetIterator for 1 epoch.
+ * Note that this method does not do layerwise pretraining.
+ * For pretraining use method pretrain.. {@link #pretrain(DataSetIterator)}
* * @param iterator Training data (DataSetIterator) */ @@ -1876,12 +1984,14 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } private synchronized void fitHelper(DataSetIterator iterator) { - // we're wrapping all iterators into AsyncDataSetIterator to provide background prefetch - where appropriate + // we're wrapping all iterators into AsyncDataSetIterator to provide background prefetch - where + // appropriate DataSetIterator iter; boolean destructable = false; if (iterator.asyncSupported()) { - iter = new AsyncDataSetIterator(iterator, - Math.min(Nd4j.getAffinityManager().getNumberOfDevices() * 2, 2), true); + iter = + new AsyncDataSetIterator( + iterator, Math.min(Nd4j.getAffinityManager().getNumberOfDevices() * 2, 2), true); destructable = true; } else { iter = iterator; @@ -1895,20 +2005,26 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial if (getNetConfiguration().getTrainingWorkspaceMode() == WorkspaceMode.NONE) { workspaceMgr = LayerWorkspaceMgr.noWorkspaces(); } else { - workspaceMgr = LayerWorkspaceMgr.builder() - .with(ArrayType.ACTIVATIONS, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) - .with(ArrayType.INPUT, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) - .with(ArrayType.FF_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) - .with(ArrayType.BP_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) - .with(ArrayType.RNN_FF_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, - WS_RNN_LOOP_WORKING_MEM_CONFIG) - .with(ArrayType.RNN_BP_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, - WS_RNN_LOOP_WORKING_MEM_CONFIG) - //Note for updater working memory, we have the option to re-use WS_ALL_LAYERS_ACT or FF/BP_WORKING_MEM - // as these should be closed by the time updaters are executed - //Generally, WS_ALL_LAYERS_ACT will be the larger of the two, so we'll use this - .with(ArrayType.UPDATER_WORKING_MEM, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) - .build(); + workspaceMgr = + LayerWorkspaceMgr.builder() + .with(ArrayType.ACTIVATIONS, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) + .with(ArrayType.INPUT, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) + .with(ArrayType.FF_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) + .with(ArrayType.BP_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) + .with( + ArrayType.RNN_FF_LOOP_WORKING_MEM, + WS_RNN_LOOP_WORKING_MEM, + WS_RNN_LOOP_WORKING_MEM_CONFIG) + .with( + ArrayType.RNN_BP_LOOP_WORKING_MEM, + WS_RNN_LOOP_WORKING_MEM, + WS_RNN_LOOP_WORKING_MEM_CONFIG) + // Note for updater working memory, we have the option to re-use WS_ALL_LAYERS_ACT or + // FF/BP_WORKING_MEM + // as these should be closed by the time updaters are executed + // Generally, WS_ALL_LAYERS_ACT will be the larger of the two, so we'll use this + .with(ArrayType.UPDATER_WORKING_MEM, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) + .build(); } workspaceMgr.setHelperWorkspacePointers(helperWorkspaces); @@ -1933,8 +2049,12 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial boolean hasMaskArrays = next.hasMaskArrays(); if (getNetConfiguration().getBackpropType() == BackpropType.TruncatedBPTT) { - doTruncatedBPTT(next.getFeatures(), next.getLabels(), next.getFeaturesMaskArray(), - next.getLabelsMaskArray(), workspaceMgr); + doTruncatedBPTT( + next.getFeatures(), + next.getLabels(), + next.getFeaturesMaskArray(), + next.getLabelsMaskArray(), + workspaceMgr); } else { if (hasMaskArrays) { setLayerMaskArrays(next.getFeaturesMaskArray(), next.getLabelsMaskArray()); @@ -1945,12 +2065,16 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial if (solver == null) { try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { - solver = new Solver.Builder().configure(getNetConfiguration()).listeners(getListeners()).model(this) - .build(); + solver = + new Solver.Builder() + .configure(getNetConfiguration()) + .listeners(this.getTrainingListeners()) + .model(this) + .build(); } } - //TODO CACHE + // TODO CACHE solver.optimize(workspaceMgr); } @@ -1981,16 +2105,15 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial * Calculate parameter gradients and input activation gradients given the input and labels, and * optionally mask arrays * - * @param features Features for gradient calculation - * @param label Labels for gradient - * @param fMask Features mask array (may be null) + * @param features Features for gradient calculation + * @param label Labels for gradient + * @param fMask Features mask array (may be null) * @param labelMask Label mask array (may be null) * @return A pair of gradient arrays: parameter gradients (in Gradient object) and input - * activation gradients + * activation gradients */ - public Pair calculateGradients(@NonNull INDArray features, - @NonNull INDArray label, - INDArray fMask, INDArray labelMask) { + public Pair calculateGradients( + @NonNull INDArray features, @NonNull INDArray label, INDArray fMask, INDArray labelMask) { try { return calculateGradientsHelper(features, label, fMask, labelMask); } catch (OutOfMemoryError e) { @@ -1999,9 +2122,8 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } } - private Pair calculateGradientsHelper(INDArray features, INDArray label, - INDArray fMask, - INDArray labelMask) { + private Pair calculateGradientsHelper( + INDArray features, INDArray label, INDArray fMask, INDArray labelMask) { setInput(features); setLabels(label); setLayerMaskArrays(fMask, labelMask); @@ -2010,42 +2132,51 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial if (getNetConfiguration().getTrainingWorkspaceMode() == WorkspaceMode.NONE) { mgr = LayerWorkspaceMgr.noWorkspaces(); } else { - mgr = LayerWorkspaceMgr.builder() - .with(ArrayType.INPUT, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) - .with(ArrayType.ACTIVATIONS, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) - .with(ArrayType.FF_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) - .with(ArrayType.BP_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) - .with(ArrayType.RNN_FF_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, - WS_RNN_LOOP_WORKING_MEM_CONFIG) - .with(ArrayType.RNN_BP_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, - WS_RNN_LOOP_WORKING_MEM_CONFIG) - .build(); + mgr = + LayerWorkspaceMgr.builder() + .with(ArrayType.INPUT, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) + .with(ArrayType.ACTIVATIONS, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) + .with(ArrayType.FF_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) + .with(ArrayType.BP_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) + .with( + ArrayType.RNN_FF_LOOP_WORKING_MEM, + WS_RNN_LOOP_WORKING_MEM, + WS_RNN_LOOP_WORKING_MEM_CONFIG) + .with( + ArrayType.RNN_BP_LOOP_WORKING_MEM, + WS_RNN_LOOP_WORKING_MEM, + WS_RNN_LOOP_WORKING_MEM_CONFIG) + .build(); if (getNetConfiguration().getCacheMode() != null) { - //For now: store cache mode activations in activations workspace + // For now: store cache mode activations in activations workspace mgr.setWorkspace(ArrayType.FF_CACHE, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG); } } mgr.setHelperWorkspacePointers(helperWorkspaces); - //Calculate activations (which are stored in each layer, and used in backprop) + // Calculate activations (which are stored in each layer, and used in backprop) try (MemoryWorkspace ws = mgr.notifyScopeEntered(ArrayType.ACTIVATIONS)) { - //First: do a feed-forward through the network - //Note that we don't actually need to do the full forward pass through the output layer right now; but we do + // First: do a feed-forward through the network + // Note that we don't actually need to do the full forward pass through the output layer right + // now; but we do // need the input to the output layer to be set (such that backprop can be done) - List activations = ffToLayerActivationsInWs(layers.length - 2, FwdPassType.STANDARD, - false, input, mask, fMask); + List activations = + ffToLayerActivationsInWs( + layers.length - 2, FwdPassType.STANDARD, false, input, mask, fMask); if (!trainingListeners.isEmpty()) { - //TODO: We possibly do want output layer activations in some cases here... + // TODO: We possibly do want output layer activations in some cases here... for (TrainingListener tl : trainingListeners) { tl.onForwardPass(this, activations); } } INDArray inputToOutputLayer = activations.get(activations.size() - 1); if (getNetConfiguration().getInputPreProcess(layers.length - 1) != null) { - inputToOutputLayer = getNetConfiguration().getInputPreProcess(layers.length - 1) - .preProcess(inputToOutputLayer, getInputMiniBatchSize(), mgr); - //Validate activations location + inputToOutputLayer = + getNetConfiguration() + .getInputPreProcess(layers.length - 1) + .preProcess(inputToOutputLayer, getInputMiniBatchSize(), mgr); + // Validate activations location } getOutputLayer().setInput(inputToOutputLayer, mgr); @@ -2062,18 +2193,16 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial * network learning) (b) backpropGradient (layer method, for when MultiLayerNetwork is used as a * layer) * - * @param epsilon Errors (technically errors .* activations). Not used if - * withOutputLayer = true - * @param withOutputLayer if true: assume last layer is output layer, and calculate errors - * based on labels. In this case, the epsilon input is not used - * (may/should be null). If false: calculate backprop gradients + * @param epsilon Errors (technically errors .* activations). Not used if withOutputLayer = true + * @param withOutputLayer if true: assume last layer is output layer, and calculate errors based + * on labels. In this case, the epsilon input is not used (may/should be null). If false: + * calculate backprop gradients * @param returnInputActGrad If true: terun the input activation gradients (detached). False: - * don't return + * don't return * @return Gradients and the error (epsilon) at the input */ - protected Pair calcBackpropGradients(INDArray epsilon, - boolean withOutputLayer, boolean tbptt, - boolean returnInputActGrad) { + protected Pair calcBackpropGradients( + INDArray epsilon, boolean withOutputLayer, boolean tbptt, boolean returnInputActGrad) { if (flattenedGradients == null) { initGradientsView(); } @@ -2087,63 +2216,82 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial mgrEven = LayerWorkspaceMgr.noWorkspaces(); mgrOdd = mgrEven; WorkspaceUtils.assertNoWorkspacesOpen( - "Expected no workspace active in calcBackpropGradients when " + - "training workspace is set to none"); + "Expected no workspace active in calcBackpropGradients when " + + "training workspace is set to none"); } else { - /* - Workspaces for backprop in MLN share some features with outputOfLayerDetached, in terms of the - "two alternating workspaces" idea (but for activation gradients here, instead of activations there). + /* + Workspaces for backprop in MLN share some features with outputOfLayerDetached, in terms of the + "two alternating workspaces" idea (but for activation gradients here, instead of activations there). - Workspace design for backprop: - First: we calculate all activations, and ensure they are in WS_ALL_LAYERS_ACT. We assume this is done - EXTERNALLY to this method - Then: we iterate backwards over layers. + Workspace design for backprop: + First: we calculate all activations, and ensure they are in WS_ALL_LAYERS_ACT. We assume this is done + EXTERNALLY to this method + Then: we iterate backwards over layers. - Activations gradient workspaces: opened/closed every second layer. - mgrEven (WS_LAYER_ACT_1) activation grad WS opens at start of 8, 4, 2, 0; closed at end of 7, 5, 3, 1 etc - mgrOdd (WS_LAYER_ACT_2) activation grad WS opens at start of 7, 3, 5, 1; closed at end of 6, 4, 2, 0 etc + Activations gradient workspaces: opened/closed every second layer. + mgrEven (WS_LAYER_ACT_1) activation grad WS opens at start of 8, 4, 2, 0; closed at end of 7, 5, 3, 1 etc + mgrOdd (WS_LAYER_ACT_2) activation grad WS opens at start of 7, 3, 5, 1; closed at end of 6, 4, 2, 0 etc - */ + */ - mgrEven = LayerWorkspaceMgr.builder() - //Activations in context of backprop (preOut methods etc) are not used outside of the layer itself - .with(ArrayType.ACTIVATIONS, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) - .with(ArrayType.INPUT, WS_ALL_LAYERS_ACT, - WS_ALL_LAYERS_ACT_CONFIG) //Usually not required here. Exception: OutputLayer dropout - .with(ArrayType.ACTIVATION_GRAD, WS_LAYER_ACT_1, WS_LAYER_ACT_X_CONFIG) - .with(ArrayType.FF_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) - .with(ArrayType.BP_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) - .with(ArrayType.RNN_FF_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, - WS_RNN_LOOP_WORKING_MEM_CONFIG) - .with(ArrayType.RNN_BP_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, - WS_RNN_LOOP_WORKING_MEM_CONFIG) - .build(); + mgrEven = + LayerWorkspaceMgr.builder() + // Activations in context of backprop (preOut methods etc) are not used outside of the + // layer itself + .with(ArrayType.ACTIVATIONS, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) + .with( + ArrayType.INPUT, + WS_ALL_LAYERS_ACT, + WS_ALL_LAYERS_ACT_CONFIG) // Usually not required here. Exception: OutputLayer + // dropout + .with(ArrayType.ACTIVATION_GRAD, WS_LAYER_ACT_1, WS_LAYER_ACT_X_CONFIG) + .with(ArrayType.FF_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) + .with(ArrayType.BP_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) + .with( + ArrayType.RNN_FF_LOOP_WORKING_MEM, + WS_RNN_LOOP_WORKING_MEM, + WS_RNN_LOOP_WORKING_MEM_CONFIG) + .with( + ArrayType.RNN_BP_LOOP_WORKING_MEM, + WS_RNN_LOOP_WORKING_MEM, + WS_RNN_LOOP_WORKING_MEM_CONFIG) + .build(); - mgrOdd = LayerWorkspaceMgr.builder() - //Activations in context of backprop (preOut methods etc) are not used outside of the layer itself - .with(ArrayType.ACTIVATIONS, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) - .with(ArrayType.INPUT, WS_ALL_LAYERS_ACT, - WS_ALL_LAYERS_ACT_CONFIG) //Usually not required here. Exception: OutputLayer dropout - .with(ArrayType.ACTIVATION_GRAD, WS_LAYER_ACT_2, WS_LAYER_ACT_X_CONFIG) - .with(ArrayType.FF_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) - .with(ArrayType.BP_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) - .with(ArrayType.RNN_FF_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, - WS_RNN_LOOP_WORKING_MEM_CONFIG) - .with(ArrayType.RNN_BP_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, - WS_RNN_LOOP_WORKING_MEM_CONFIG) - .build(); + mgrOdd = + LayerWorkspaceMgr.builder() + // Activations in context of backprop (preOut methods etc) are not used outside of the + // layer itself + .with(ArrayType.ACTIVATIONS, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) + .with( + ArrayType.INPUT, + WS_ALL_LAYERS_ACT, + WS_ALL_LAYERS_ACT_CONFIG) // Usually not required here. Exception: OutputLayer + // dropout + .with(ArrayType.ACTIVATION_GRAD, WS_LAYER_ACT_2, WS_LAYER_ACT_X_CONFIG) + .with(ArrayType.FF_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) + .with(ArrayType.BP_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) + .with( + ArrayType.RNN_FF_LOOP_WORKING_MEM, + WS_RNN_LOOP_WORKING_MEM, + WS_RNN_LOOP_WORKING_MEM_CONFIG) + .with( + ArrayType.RNN_BP_LOOP_WORKING_MEM, + WS_RNN_LOOP_WORKING_MEM, + WS_RNN_LOOP_WORKING_MEM_CONFIG) + .build(); if (epsilon == null) { - //If epsilon is non-null: external errors use case -> inputs are already detached - WorkspaceUtils.assertOpenActiveAndCurrent(WS_ALL_LAYERS_ACT, - "calcBackpropGradients method requires workspace WS_ALL_LAYERS_ACT" + - " to be open when workspaces are used"); + // If epsilon is non-null: external errors use case -> inputs are already detached + WorkspaceUtils.assertOpenActiveAndCurrent( + WS_ALL_LAYERS_ACT, + "calcBackpropGradients method requires workspace WS_ALL_LAYERS_ACT" + + " to be open when workspaces are used"); } } mgrEven.setHelperWorkspacePointers(helperWorkspaces); mgrOdd.setHelperWorkspacePointers(helperWorkspaces); - //calculate and apply the backward gradient for every layer + // calculate and apply the backward gradient for every layer /* * Skip the output layer for the indexing and just loop backwards updating the coefficients for each layer. * (when withOutputLayer == true) @@ -2154,7 +2302,8 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial * This interpretation transpose a few things to get mini batch because ND4J is rows vs columns organization for params */ int numLayers = getnLayers(); - //Store gradients is a list; used to ensure iteration order in DefaultGradient linked hash map. i.e., layer 0 first instead of output layer + // Store gradients is a list; used to ensure iteration order in DefaultGradient linked hash map. + // i.e., layer 0 first instead of output layer LinkedList> gradientList = new LinkedList<>(); Pair currPair = null; @@ -2191,55 +2340,78 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial outputLayer.setLabels(labels); } - //Open activation gradients WS *then* BP working memory, so BP working memory is opened last for use in layers + // Open activation gradients WS *then* BP working memory, so BP working memory is opened + // last for use in layers wsActGradTemp = workspaceMgr.notifyScopeEntered(ArrayType.ACTIVATION_GRAD); - try (MemoryWorkspace wsBPWorking = workspaceMgr.notifyScopeEntered( - ArrayType.BP_WORKING_MEM)) { + try (MemoryWorkspace wsBPWorking = + workspaceMgr.notifyScopeEntered(ArrayType.BP_WORKING_MEM)) { - //Note that because we're opening activation workspaces not in a simple nested order, we'll manually - // override the previous workspace setting. Otherwise, when we close these workspaces, the "current" + // Note that because we're opening activation workspaces not in a simple nested order, + // we'll manually + // override the previous workspace setting. Otherwise, when we close these workspaces, the + // "current" // workspace may be set to the incorrect one wsActGradTemp.setPreviousWorkspace(initialWorkspace); wsBPWorking.setPreviousWorkspace(initialWorkspace); - INDArray eps = (i == layers.length - 1 ? epsilon - : currPair.getRight()); //eps is null for OutputLayer + INDArray eps = + (i == layers.length - 1 + ? epsilon + : currPair.getRight()); // eps is null for OutputLayer if (!tbptt) { - //Standard case + // Standard case currPair = layers[i].backpropGradient(eps, workspaceMgr); } else { - //TBPTT gradient + // TBPTT gradient if (layers[i] instanceof RecurrentLayer) { - currPair = ((RecurrentLayer) layers[i]).tbpttBackpropGradient(currPair.getSecond(), - getNetConfiguration().getTbpttBackLength(), workspaceMgr); + currPair = + ((RecurrentLayer) layers[i]) + .tbpttBackpropGradient( + currPair.getSecond(), + getNetConfiguration().getTbpttBackLength(), + workspaceMgr); } else { currPair = layers[i].backpropGradient(currPair.getSecond(), workspaceMgr); } } if (currPair.getSecond() != null) { - //Edge case: may be null for Embedding layer, for example - validateArrayWorkspaces(workspaceMgr, currPair.getSecond(), ArrayType.ACTIVATION_GRAD, + // Edge case: may be null for Embedding layer, for example + validateArrayWorkspaces( + workspaceMgr, + currPair.getSecond(), + ArrayType.ACTIVATION_GRAD, i, - false, "Backprop"); + false, + "Backprop"); } - for (Map.Entry entry : currPair.getFirst().gradientForVariable() - .entrySet()) { + for (Map.Entry entry : + currPair.getFirst().gradientForVariable().entrySet()) { String origName = entry.getKey(); multiGradientKey = i + "_" + origName; - gradientList.addLast(new Triple<>(multiGradientKey, entry.getValue(), - currPair.getFirst().flatteningOrderForVariable(origName))); + gradientList.addLast( + new Triple<>( + multiGradientKey, + entry.getValue(), + currPair.getFirst().flatteningOrderForVariable(origName))); } if (getNetConfiguration().getInputPreProcess(i) != null) { - currPair = new Pair<>(currPair.getFirst(), - this.getNetConfiguration().getInputPreProcess(i) - .backprop(currPair.getSecond(), getInputMiniBatchSize(), workspaceMgr)); + currPair = + new Pair<>( + currPair.getFirst(), + this.getNetConfiguration() + .getInputPreProcess(i) + .backprop(currPair.getSecond(), getInputMiniBatchSize(), workspaceMgr)); if (i > 0 && currPair.getSecond() != null) { - validateArrayWorkspaces(workspaceMgr, currPair.getSecond(), ArrayType.ACTIVATION_GRAD, + validateArrayWorkspaces( + workspaceMgr, + currPair.getSecond(), + ArrayType.ACTIVATION_GRAD, i, - true, "Backprop"); + true, + "Backprop"); } } @@ -2278,7 +2450,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } } if (wsActGradTemp != null) { - //Should only be non-null on exception + // Should only be non-null on exception try { wsActGradTemp.close(); } catch (Throwable t2) { @@ -2302,18 +2474,19 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial if (getNetConfiguration().getTrainingWorkspaceMode() == WorkspaceMode.NONE) { WorkspaceUtils.assertNoWorkspacesOpen( - "Expected no workspace active in calcBackpropGradients when " + - "training workspace is set to none"); + "Expected no workspace active in calcBackpropGradients when " + + "training workspace is set to none"); } else { if (epsilon == null) { - //If epsilon != null: external errors use case (inputs are detached instead) - WorkspaceUtils.assertOpenActiveAndCurrent(WS_ALL_LAYERS_ACT, - "calcBackpropGradients: WS_ALL_LAYERS_ACT is no" + - " longer the currently open/active workspace"); + // If epsilon != null: external errors use case (inputs are detached instead) + WorkspaceUtils.assertOpenActiveAndCurrent( + WS_ALL_LAYERS_ACT, + "calcBackpropGradients: WS_ALL_LAYERS_ACT is no" + + " longer the currently open/active workspace"); } } - //Add gradients to Gradients (map), in correct order + // Add gradients to Gradients (map), in correct order for (Triple triple : gradientList) { gradient.setGradientFor(triple.getFirst(), triple.getSecond(), triple.getThird()); } @@ -2321,19 +2494,25 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial return new Pair<>(gradient, currPair.getSecond()); } - protected void doTruncatedBPTT(INDArray input, INDArray labels, INDArray featuresMaskArray, - INDArray labelsMaskArray, LayerWorkspaceMgr workspaceMgr) { + protected void doTruncatedBPTT( + INDArray input, + INDArray labels, + INDArray featuresMaskArray, + INDArray labelsMaskArray, + LayerWorkspaceMgr workspaceMgr) { if (input.rank() != 3 || labels.rank() != 3) { log.warn( "Cannot do truncated BPTT with non-3d inputs or labels. Expect input with shape [miniBatchSize,nIn,timeSeriesLength], got " - + Arrays.toString(input.shape()) + "\tand labels with shape " + + Arrays.toString(input.shape()) + + "\tand labels with shape " + Arrays.toString(labels.shape())); return; } if (input.size(2) != labels.size(2)) { log.warn( "Input and label time series have different lengths: {} input length, {} label length", - input.size(2), labels.size(2)); + input.size(2), + labels.size(2)); return; } @@ -2342,7 +2521,8 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial val timeSeriesLength = input.size(2); long nSubsets = timeSeriesLength / fwdLen; if (timeSeriesLength % fwdLen != 0) { - nSubsets++; //Example: 100 fwdLen with timeSeriesLength=120 -> want 2 subsets (1 of size 100, 1 of size 20) + nSubsets++; // Example: 100 fwdLen with timeSeriesLength=120 -> want 2 subsets (1 of size 100, + // 1 of size 20) } rnnClearPreviousState(); @@ -2357,8 +2537,14 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial if (startTimeIdx > Integer.MAX_VALUE || endTimeIdx > Integer.MAX_VALUE) { throw new ND4JArraySizeException(); } - INDArray[] subsets = getSubsetsForTbptt((int) startTimeIdx, (int) endTimeIdx, input, labels, - featuresMaskArray, labelsMaskArray); + INDArray[] subsets = + getSubsetsForTbptt( + (int) startTimeIdx, + (int) endTimeIdx, + input, + labels, + featuresMaskArray, + labelsMaskArray); setInput(subsets[0]); setLabels(subsets[1]); @@ -2366,13 +2552,17 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial if (solver == null) { try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { - solver = new Solver.Builder().configure(getNetConfiguration()).listeners(getListeners()).model(this) - .build(); + solver = + new Solver.Builder() + .configure(getNetConfiguration()) + .listeners(this.getTrainingListeners()) + .model(this) + .build(); } } solver.optimize(workspaceMgr); - //Finally, update the state of the RNN layers: + // Finally, update the state of the RNN layers: updateRnnStateWithTBPTTState(); } @@ -2380,30 +2570,36 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial clearLayerMaskArrays(); } - private INDArray[] getSubsetsForTbptt(int startTimeIdx, int endTimeIdx, INDArray input, + private INDArray[] getSubsetsForTbptt( + int startTimeIdx, + int endTimeIdx, + INDArray input, INDArray labels, - INDArray fMask, INDArray lMask) { + INDArray fMask, + INDArray lMask) { INDArray[] out = new INDArray[4]; - out[0] = input.get(NDArrayIndex.all(), NDArrayIndex.all(), - NDArrayIndex.interval(startTimeIdx, endTimeIdx)); - out[1] = labels.get(NDArrayIndex.all(), NDArrayIndex.all(), - NDArrayIndex.interval(startTimeIdx, endTimeIdx)); + out[0] = + input.get( + NDArrayIndex.all(), + NDArrayIndex.all(), + NDArrayIndex.interval(startTimeIdx, endTimeIdx)); + out[1] = + labels.get( + NDArrayIndex.all(), + NDArrayIndex.all(), + NDArrayIndex.interval(startTimeIdx, endTimeIdx)); if (fMask != null) { - out[2] = fMask.get(NDArrayIndex.all(), - NDArrayIndex.interval(startTimeIdx, endTimeIdx)); + out[2] = fMask.get(NDArrayIndex.all(), NDArrayIndex.interval(startTimeIdx, endTimeIdx)); } if (lMask != null) { - out[3] = lMask.get(NDArrayIndex.all(), - NDArrayIndex.interval(startTimeIdx, endTimeIdx)); + out[3] = lMask.get(NDArrayIndex.all(), NDArrayIndex.interval(startTimeIdx, endTimeIdx)); } return out; } - /** - * Intended for internal/developer use - */ + /** Intended for internal/developer use */ public void updateRnnStateWithTBPTTState() { for (int i = 0; i < layers.length; i++) { if (layers[i] instanceof RecurrentLayer) { @@ -2420,43 +2616,16 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial * * @return listeners set for this network */ - public Collection getListeners() { + public Collection getTrainingListeners() { return trainingListeners; } - @Override - public void setListeners(TrainingListener ... listeners) { - if (layers == null) { - init(); - } - for (Layer layer : layers) { - layer.setListeners(listeners); - } - - if (solver != null) { - solver.setListeners(List.of(listeners)); - } - - this.trainingListeners.clear(); - if (listeners != null) { - this.trainingListeners.addAll(List.of(listeners)); - } - } - /** * @param listeners */ @Override - public void setListeners(Collection listeners) { - setListeners(listeners.toArray(new TrainingListener[]{})); - } - - /** - * @deprecated Use {@link #getListeners()} - */ - @Deprecated - public Collection getTrainingListeners() { - return trainingListeners; + public void addTrainingListeners(Collection listeners) { + this.addTrainingListeners(listeners.toArray(new TrainingListener[] {})); } /** @@ -2465,7 +2634,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial * @param listeners */ @Override - public void addListeners(TrainingListener... listeners) { + public void addTrainingListeners(TrainingListener... listeners) { Collections.addAll(trainingListeners, listeners); // fixme this is wrong, since it removes existing listeners from the solver @@ -2476,8 +2645,9 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial /** * Usable only for classification networks in conjunction with OutputLayer. Cannot be used with - * RnnOutputLayer, CnnLossLayer, or networks used for regression.
To get the raw output - * activations of the output layer, use {@link #output(INDArray)} or similar.
+ * RnnOutputLayer, CnnLossLayer, or networks used for regression.
+ * To get the raw output activations of the output layer, use {@link #output(INDArray)} or + * similar.
*
* Equivalent to argmax(this.output(input)): Returns the predicted class indices corresponding to * the predictions for each example in the features array. @@ -2493,7 +2663,8 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial throw new ND4JArraySizeException(); } - Preconditions.checkState(output.rank() == 2, + Preconditions.checkState( + output.rank() == 2, "predict(INDArray) method can only be used on rank 2 output - got array with rank %s", output.rank()); return output.argMax(1).toIntVector(); @@ -2505,7 +2676,8 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial */ @Override public List predict(org.nd4j.linalg.dataset.api.DataSet dataSet) { - Preconditions.checkState(dataSet.getLabelNamesList() != null, + Preconditions.checkState( + dataSet.getLabelNamesList() != null, "This method can only be used when the DataSet contains a label name list"); int[] intRet = predict(dataSet.getFeatures()); List ret = new ArrayList<>(); @@ -2518,26 +2690,28 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial /** * Fit the model for one iteration on the provided data * - * @param data the examples to classify (one example in each row) + * @param data the examples to classify (one example in each row) * @param labels the example labels(a binary outcome matrix) */ @Override public void fit(INDArray data, INDArray labels) { + if (!initCalled) init(); fit(data, labels, null, null); } /** * Fit the model for one iteration on the provided data * - * @param features the examples to classify (one example in each row) - * @param labels the example labels(a binary outcome matrix) + * @param features the examples to classify (one example in each row) + * @param labels the example labels(a binary outcome matrix) * @param featuresMask The mask array for the features (used for variable length time series, - * etc). May be null. - * @param labelsMask The mask array for the labels (used for variable length time series, etc). - * May be null. + * etc). May be null. + * @param labelsMask The mask array for the labels (used for variable length time series, etc). + * May be null. */ - public synchronized void fit(INDArray features, INDArray labels, INDArray featuresMask, - INDArray labelsMask) { + public synchronized void fit( + INDArray features, INDArray labels, INDArray featuresMask, INDArray labelsMask) { + if (!initCalled) init(); try { fitHelper(features, labels, featuresMask, labelsMask); } catch (OutOfMemoryError e) { @@ -2546,10 +2720,11 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } } - private void fitHelper(INDArray features, INDArray labels, INDArray featuresMask, - INDArray labelsMask) { + private void fitHelper( + INDArray features, INDArray labels, INDArray featuresMask, INDArray labelsMask) { + if (!initCalled) init(); if (numParams() == 0) { - //No op: can't fit a network with 0 parameters + // No op: can't fit a network with 0 parameters return; } @@ -2562,14 +2737,16 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial if (getNetConfiguration().getTrainingWorkspaceMode() == null) { workspaceMgr = LayerWorkspaceMgr.noWorkspaces(); } else { - workspaceMgr = LayerWorkspaceMgr.builder() - .with(ArrayType.INPUT, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) - .with(ArrayType.ACTIVATIONS, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) - //Note for updater working memory, we have the option to re-use WS_ALL_LAYERS_ACT or FF/BP_WORKING_MEM - // these should be closed by the time updaters are executed - //Generally, WS_ALL_LAYERS_ACT will be the larger of the two, so we'll use this - .with(ArrayType.UPDATER_WORKING_MEM, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) - .build(); + workspaceMgr = + LayerWorkspaceMgr.builder() + .with(ArrayType.INPUT, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) + .with(ArrayType.ACTIVATIONS, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) + // Note for updater working memory, we have the option to re-use WS_ALL_LAYERS_ACT or + // FF/BP_WORKING_MEM + // these should be closed by the time updaters are executed + // Generally, WS_ALL_LAYERS_ACT will be the larger of the two, so we'll use this + .with(ArrayType.UPDATER_WORKING_MEM, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) + .build(); } workspaceMgr.setHelperWorkspacePointers(helperWorkspaces); @@ -2578,11 +2755,15 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } else { if (solver == null) { try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { - solver = new Solver.Builder().configure(getNetConfiguration()).listeners(getListeners()).model(this) - .build(); + solver = + new Solver.Builder() + .configure(getNetConfiguration()) + .listeners(this.getTrainingListeners()) + .model(this) + .build(); } } - //TODO CACHE WORKSPACE, IF USED??? + // TODO CACHE WORKSPACE, IF USED??? solver.optimize(workspaceMgr); } @@ -2603,7 +2784,11 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial */ @Override public void fit(org.nd4j.linalg.dataset.api.DataSet data) { - fit(data.getFeatures(), data.getLabels(), data.getFeaturesMaskArray(), + if (!initCalled) init(); + fit( + data.getFeatures(), + data.getLabels(), + data.getFeaturesMaskArray(), data.getLabelsMaskArray()); } @@ -2611,10 +2796,11 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial * Fit the model for one iteration on the provided data * * @param examples the examples to classify (one example in each row) - * @param labels the labels for each example (the number of labels must match + * @param labels the labels for each example (the number of labels must match */ @Override public void fit(INDArray examples, int[] labels) { + if (!initCalled) init(); org.deeplearning4j.nn.conf.layers.OutputLayer layerConf = (org.deeplearning4j.nn.conf.layers.OutputLayer) getOutputLayer().getLayerConfiguration(); @@ -2630,8 +2816,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial * * @param input Input to the network * @param train whether the output is test or train. This mainly affect hyper parameters such as - * dropout and batch normalization, which have different behaviour for test vs. - * train + * dropout and batch normalization, which have different behaviour for test vs. train * @return The network predictions - i.e., the activations of the final layer */ public INDArray output(INDArray input, TrainingMode train) { @@ -2644,8 +2829,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial * * @param input Input to the network * @param train whether the output is test or train. This mainly affect hyper parameters such as - * dropout and batch normalization, which have different behaviour for test vs. - * train + * dropout and batch normalization, which have different behaviour for test vs. train * @return The network predictions - i.e., the activations of the final layer */ public INDArray output(INDArray input, boolean train) { @@ -2657,54 +2841,64 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial * situations such as one-to-many and many-to-one recurrent neural network (RNN) designs, as well * as for supporting time series of varying lengths within the same minibatch. */ - public INDArray output(INDArray input, boolean train, INDArray featuresMask, - INDArray labelsMask) { + public INDArray output( + INDArray input, boolean train, INDArray featuresMask, INDArray labelsMask) { return output(input, train, featuresMask, labelsMask, null); } /** - * Get the network output, which is optionally placed in the specified memory workspace.
If no - * memory workspace is provided, the output will be detached (not in any workspace).
If a - * memory workspace is provided, the output activation array (i.e., the INDArray returned by this - * method) will be placed in the specified workspace. This workspace must be opened by the user - * before calling this method - and the user is responsible for (a) closing this workspace, and - * (b) ensuring the output array is not used out of scope (i.e., not used after closing the + * Get the network output, which is optionally placed in the specified memory workspace.
+ * If no memory workspace is provided, the output will be detached (not in any workspace).
+ * If a memory workspace is provided, the output activation array (i.e., the INDArray returned by + * this method) will be placed in the specified workspace. This workspace must be opened by the + * user before calling this method - and the user is responsible for (a) closing this workspace, + * and (b) ensuring the output array is not used out of scope (i.e., not used after closing the * workspace to which it belongs - as this is likely to cause either an exception when used, or a * crash). * - * @param input Input to the network - * @param train True for train, false otherwise + * @param input Input to the network + * @param train True for train, false otherwise * @param outputWorkspace May be null. If not null: the workspace MUST be opened before calling - * this method. + * this method. * @return The output/activations from the network (either detached or in the specified workspace - * if provided) + * if provided) */ public INDArray output(INDArray input, boolean train, MemoryWorkspace outputWorkspace) { return output(input, train, null, null, outputWorkspace); } /** - * Get the network output, which is optionally placed in the specified memory workspace.
If no - * memory workspace is provided, the output will be detached (not in any workspace).
If a - * memory workspace is provided, the output activation array (i.e., the INDArray returned by this - * method) will be placed in the specified workspace. This workspace must be opened by the user - * before calling this method - and the user is responsible for (a) closing this workspace, and - * (b) ensuring the output array is not used out of scope (i.e., not used after closing the + * Get the network output, which is optionally placed in the specified memory workspace.
+ * If no memory workspace is provided, the output will be detached (not in any workspace).
+ * If a memory workspace is provided, the output activation array (i.e., the INDArray returned by + * this method) will be placed in the specified workspace. This workspace must be opened by the + * user before calling this method - and the user is responsible for (a) closing this workspace, + * and (b) ensuring the output array is not used out of scope (i.e., not used after closing the * workspace to which it belongs - as this is likely to cause either an exception when used, or a * crash). * - * @param input Input to the network - * @param train True for train, false otherwise + * @param input Input to the network + * @param train True for train, false otherwise * @param outputWorkspace May be null. If not null: the workspace MUST be opened before calling - * this method. + * this method. * @return The output/activations from the network (either detached or in the specified workspace - * if provided) + * if provided) */ - public synchronized INDArray output(INDArray input, boolean train, INDArray featuresMask, - INDArray labelsMask, MemoryWorkspace outputWorkspace) { + public synchronized INDArray output( + INDArray input, + boolean train, + INDArray featuresMask, + INDArray labelsMask, + MemoryWorkspace outputWorkspace) { try { - return outputOfLayerDetached(train, FwdPassType.STANDARD, layers.length - 1, input, - featuresMask, labelsMask, outputWorkspace); + return outputOfLayerDetached( + train, + FwdPassType.STANDARD, + layers.length - 1, + input, + featuresMask, + labelsMask, + outputWorkspace); } catch (OutOfMemoryError e) { CrashReportingUtil.writeMemoryCrashDump(this, e); throw e; @@ -2713,24 +2907,32 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial /** * This method uses provided OutputAdapter to return custom object built from INDArray - *

- * PLEASE NOTE: This method uses dedicated Workspace for output generation to avoid redundant + * + *

PLEASE NOTE: This method uses dedicated Workspace for output generation to avoid redundant * allocations * - * @param inputs Input arrays to the netwonk - * @param inputMasks Optional input mask arrays (may be null) - * @param labelMasks Optional label mask arrays (may be null + * @param inputs Input arrays to the netwonk + * @param inputMasks Optional input mask arrays (may be null) + * @param labelMasks Optional label mask arrays (may be null * @param outputAdapter OutputAdapter instance - * @param T extends Object + * @param T extends Object * @return T instance produced by OutputAdapter */ - public synchronized T output(@NonNull INDArray inputs, INDArray inputMasks, - INDArray labelMasks, @NonNull OutputAdapter outputAdapter) { - try (val ws = Nd4j.getWorkspaceManager() - .getAndActivateWorkspace(WS_ALL_LAYERS_ACT_CONFIG, WS_OUTPUT_MEM)) { + public synchronized T output( + @NonNull INDArray inputs, + INDArray inputMasks, + INDArray labelMasks, + @NonNull OutputAdapter outputAdapter) { + try (val ws = + Nd4j.getWorkspaceManager() + .getAndActivateWorkspace(WS_ALL_LAYERS_ACT_CONFIG, WS_OUTPUT_MEM)) { if (outputAdapter instanceof ModelAdapter) { - return ((ModelAdapter) outputAdapter).apply(this, new INDArray[]{inputs}, - new INDArray[]{inputMasks}, new INDArray[]{labelMasks}); + return ((ModelAdapter) outputAdapter) + .apply( + this, + new INDArray[] {inputs}, + new INDArray[] {inputMasks}, + new INDArray[] {labelMasks}); } else { return outputAdapter.apply(output(inputs, false, inputMasks, labelMasks, ws)); } @@ -2739,8 +2941,8 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial /** * Perform inference on the provided input/features - i.e., perform forward pass using the - * provided input/features and return the output of the final layer. Equivalent to - * {@link #output(INDArray, boolean)} with train=false - i.e., this method is used for inference. + * provided input/features and return the output of the final layer. Equivalent to {@link + * #output(INDArray, boolean)} with train=false - i.e., this method is used for inference. * * @param input Input to the network * @return The network predictions - i.e., the activations of the final layer @@ -2751,13 +2953,14 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial /** * Generate the output for all examples/batches in the input iterator, and concatenate them into a - * single array. See {@link #output(INDArray)}
NOTE 1: The output array can require a - * considerable amount of memory for iterators with a large number of examples
NOTE 2: This - * method cannot be used for variable length time series outputs, as this would require padding - * arrays for some outputs, or returning a mask array (which cannot be done with this method). For - * variable length time series applications, use one of the other output methods. This method also - * cannot be used with fully convolutional networks with different output sizes (for example, - * segmentation on different input image sizes). + * single array. See {@link #output(INDArray)}
+ * NOTE 1: The output array can require a considerable amount of memory for iterators with a large + * number of examples
+ * NOTE 2: This method cannot be used for variable length time series outputs, as this would + * require padding arrays for some outputs, or returning a mask array (which cannot be done with + * this method). For variable length time series applications, use one of the other output + * methods. This method also cannot be used with fully convolutional networks with different + * output sizes (for example, segmentation on different input image sizes). * * @param iterator Data to pass through the network * @return output for all examples in the iterator, concatenated into a @@ -2780,31 +2983,34 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial if (firstOutputShape == null) { firstOutputShape = output.shape(); } else { - //Validate that shapes are the same (may not be, for some RNN variable length time series applications) + // Validate that shapes are the same (may not be, for some RNN variable length time series + // applications) long[] currShape = output.shape(); - Preconditions.checkState(firstOutputShape.length == currShape.length, - "Error during forward pass:" + - "different minibatches have different output array ranks - first minibatch shape %s, last minibatch shape %s", - firstOutputShape, currShape); - for (int i = 1; i < currShape.length; - i++) { //Skip checking minibatch dimension, fine if this varies - Preconditions.checkState(firstOutputShape[i] == currShape[i], - "Current output shape does not match first" + - " output array shape at position %s: all dimensions must match other than the first dimension.\n" - + - " For variable length output size/length use cases such as for RNNs with multiple sequence lengths," - + - " use one of the other (non iterator) output methods. First batch output shape: %s, current batch output shape: %s", - i, firstOutputShape, currShape); + Preconditions.checkState( + firstOutputShape.length == currShape.length, + "Error during forward pass:" + + "different minibatches have different output array ranks - first minibatch shape %s, last minibatch shape %s", + firstOutputShape, + currShape); + for (int i = 1; + i < currShape.length; + i++) { // Skip checking minibatch dimension, fine if this varies + Preconditions.checkState( + firstOutputShape[i] == currShape[i], + "Current output shape does not match first" + + " output array shape at position %s: all dimensions must match other than the first dimension.\n" + + " For variable length output size/length use cases such as for RNNs with multiple sequence lengths," + + " use one of the other (non iterator) output methods. First batch output shape: %s, current batch output shape: %s", + i, + firstOutputShape, + currShape); } } } return Nd4j.concat(0, outList.toArray(new INDArray[outList.size()])); } - /** - * Equivalent to {@link #output(DataSetIterator, boolean)} with train=false - */ + /** Equivalent to {@link #output(DataSetIterator, boolean)} with train=false */ public INDArray output(DataSetIterator iterator) { return output(iterator, false); } @@ -2812,7 +3018,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial /** * Perform inference and then calculate the F1 score of the output(input) vs. the labels. * - * @param input the input to perform inference with + * @param input the input to perform inference with * @param labels the true labels * @return the score for the given input,label pairs */ @@ -2836,8 +3042,8 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial /** * Sets the input and labels and calculates the score (value of the output layer loss function - * plus l1/l2 if applicable) for the prediction with respect to the true labels
This is - * equivalent to {@link #score(DataSet, boolean)} with training==false. + * plus l1/l2 if applicable) for the prediction with respect to the true labels
+ * This is equivalent to {@link #score(DataSet, boolean)} with training==false. * * @param data the data to score * @return the score for the given input,label pairs @@ -2851,10 +3057,10 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial * Sets the input and labels and calculates the score (value of the output layer loss function * plus l1/l2 if applicable) for the prediction with respect to the true labels
* - * @param data data to calculate score for + * @param data data to calculate score for * @param training If true: score during training. If false: score at test time. This can affect - * the application of certain features, such as dropout and dropconnect (which are - * applied at training time only) + * the application of certain features, such as dropout and dropconnect (which are applied at + * training time only) * @return the score (value of the loss function) */ public double score(DataSet data, boolean training) { @@ -2874,40 +3080,54 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial if (!(getOutputLayer() instanceof IOutputLayer)) { throw new IllegalStateException( - "Cannot calculate score if final layer is not an instance of IOutputLayer. " + - "Final layer is of type: " + getOutputLayer().getClass()); + "Cannot calculate score if final layer is not an instance of IOutputLayer. " + + "Final layer is of type: " + + getOutputLayer().getClass()); } - WorkspaceMode wsm = (training ? getNetConfiguration().getTrainingWorkspaceMode() - : getNetConfiguration().getInferenceWorkspaceMode()); + WorkspaceMode wsm = + (training + ? getNetConfiguration().getTrainingWorkspaceMode() + : getNetConfiguration().getInferenceWorkspaceMode()); LayerWorkspaceMgr mgr; if (wsm == WorkspaceMode.NONE) { mgr = LayerWorkspaceMgr.noWorkspaces(); } else { - mgr = LayerWorkspaceMgr.builder() - .with(ArrayType.FF_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) - .with(ArrayType.RNN_FF_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, - WS_RNN_LOOP_WORKING_MEM_CONFIG) - //TODO we can probably optimize this - .noWorkspaceFor(ArrayType.ACTIVATIONS) - .noWorkspaceFor(ArrayType.INPUT) - .build(); + mgr = + LayerWorkspaceMgr.builder() + .with(ArrayType.FF_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) + .with( + ArrayType.RNN_FF_LOOP_WORKING_MEM, + WS_RNN_LOOP_WORKING_MEM, + WS_RNN_LOOP_WORKING_MEM_CONFIG) + // TODO we can probably optimize this + .noWorkspaceFor(ArrayType.ACTIVATIONS) + .noWorkspaceFor(ArrayType.INPUT) + .build(); } mgr.setHelperWorkspacePointers(helperWorkspaces); - INDArray inputToOutputLayer = outputOfLayerDetached(training, FwdPassType.STANDARD, - layers.length - 2, data.getFeatures(), - data.getFeaturesMaskArray(), data.getLabelsMaskArray(), null); + INDArray inputToOutputLayer = + outputOfLayerDetached( + training, + FwdPassType.STANDARD, + layers.length - 2, + data.getFeatures(), + data.getFeaturesMaskArray(), + data.getLabelsMaskArray(), + null); if (data.getFeatures().size(0) > Integer.MAX_VALUE) { throw new ND4JArraySizeException(); } IOutputLayer ol = (IOutputLayer) getOutputLayer(); if (getNetConfiguration().getInputPreProcess(layers.length - 1) != null) { - inputToOutputLayer = getNetConfiguration().getInputPreProcess(layers.length - 1) - .preProcess(inputToOutputLayer, (int) data.getFeatures().size(0), mgr); + inputToOutputLayer = + getNetConfiguration() + .getInputPreProcess(layers.length - 1) + .preProcess(inputToOutputLayer, (int) data.getFeatures().size(0), mgr); } - ol.setInput(inputToOutputLayer, mgr); //Feedforward doesn't include output layer for efficiency + ol.setInput(inputToOutputLayer, mgr); // Feedforward doesn't include output layer for efficiency ol.setLabels(data.getLabels()); double score; try (MemoryWorkspace ws = mgr.notifyScopeEntered(ArrayType.FF_WORKING_MEM)) { @@ -2939,14 +3159,15 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial * Calculate the score for each example in a DataSet individually. Unlike {@link #score(DataSet)} * and {@link #score(DataSet, boolean)} this method does not average/sum over examples. This * method allows for examples to be scored individually (at test time only), which may be useful - * for example for autoencoder architectures and the like.
Each row of the output (assuming - * addRegularizationTerms == true) is equivalent to calling score(DataSet) with a single example. + * for example for autoencoder architectures and the like.
+ * Each row of the output (assuming addRegularizationTerms == true) is equivalent to calling + * score(DataSet) with a single example. * - * @param data The data to score + * @param data The data to score * @param addRegularizationTerms If true: add l1/l2 regularization terms (if any) to the score. If - * false: don't add regularization terms + * false: don't add regularization terms * @return An INDArray (column vector) of size input.numRows(); the ith entry is the score (loss - * value) of the ith example + * value) of the ith example */ public INDArray scoreExamples(DataSet data, boolean addRegularizationTerms) { try { @@ -2958,13 +3179,19 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } private INDArray scoreExamplesHelper(DataSet data, boolean addRegularizationTerms) { - INDArray inputLast = outputOfLayerDetached(false, FwdPassType.STANDARD, layers.length - 2, - data.getFeatures(), - data.getFeaturesMaskArray(), data.getLabelsMaskArray(), null); + INDArray inputLast = + outputOfLayerDetached( + false, + FwdPassType.STANDARD, + layers.length - 2, + data.getFeatures(), + data.getFeaturesMaskArray(), + data.getLabelsMaskArray(), + null); setLabels(data.getLabels()); setLayerMaskArrays(data.getFeaturesMaskArray(), data.getLabelsMaskArray()); - //TODO we might want workspaces here? + // TODO we might want workspaces here? LayerWorkspaceMgr mgr = LayerWorkspaceMgr.noWorkspaces(); INDArray out; @@ -2975,9 +3202,10 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial if (data.getFeatures().size(0) > Integer.MAX_VALUE) { throw new ND4JArraySizeException(); } - inputLast = getNetConfiguration().getInputPreProcess(layers.length - 1) - .preProcess(inputLast, - (int) data.getFeatures().size(0), mgr); + inputLast = + getNetConfiguration() + .getInputPreProcess(layers.length - 1) + .preProcess(inputLast, (int) data.getFeatures().size(0), mgr); } ol.setLabels(data.getLabels()); ol.setInput(inputLast, mgr); @@ -3010,13 +3238,11 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial * @return the score of the model (relative to the objective function) */ @Override - public double score() { + public double getScore() { return score; } - /** - * Intended for developer/internal use - */ + /** Intended for developer/internal use */ public void setScore(double score) { this.score = score; } @@ -3031,71 +3257,80 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial if (!(getOutputLayer() instanceof IOutputLayer)) { throw new DL4JException( "Cannot calculate gradient and score with respect to labels: final layer is not an IOutputLayer. " - + - "Final layer class: " + getOutputLayer().getClass() - + ". To calculate gradients and fit a network " + - "using backpropagation, the final layer must be an output layer"); + + "Final layer class: " + + getOutputLayer().getClass() + + ". To calculate gradients and fit a network " + + "using backpropagation, the final layer must be an output layer"); } - //Note: Workspace manager is only ose here for score calculation... other workspace managers are used in the + // Note: Workspace manager is only ose here for score calculation... other workspace managers + // are used in the // various FF/backprop methds LayerWorkspaceMgr mgr; if (getNetConfiguration().getTrainingWorkspaceMode() == WorkspaceMode.NONE) { mgr = LayerWorkspaceMgr.noWorkspaces(); } else { - mgr = LayerWorkspaceMgr.builder() - .with(ArrayType.INPUT, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) - .with(ArrayType.ACTIVATIONS, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) - .with(ArrayType.FF_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) - .with(ArrayType.BP_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) - .with(ArrayType.RNN_FF_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, - WS_RNN_LOOP_WORKING_MEM_CONFIG) - .with(ArrayType.RNN_BP_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, - WS_RNN_LOOP_WORKING_MEM_CONFIG) - .build(); + mgr = + LayerWorkspaceMgr.builder() + .with(ArrayType.INPUT, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) + .with(ArrayType.ACTIVATIONS, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) + .with(ArrayType.FF_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) + .with(ArrayType.BP_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) + .with( + ArrayType.RNN_FF_LOOP_WORKING_MEM, + WS_RNN_LOOP_WORKING_MEM, + WS_RNN_LOOP_WORKING_MEM_CONFIG) + .with( + ArrayType.RNN_BP_LOOP_WORKING_MEM, + WS_RNN_LOOP_WORKING_MEM, + WS_RNN_LOOP_WORKING_MEM_CONFIG) + .build(); if (getNetConfiguration().getCacheMode() != null) { - //For now: store cache mode activations in activations workspace + // For now: store cache mode activations in activations workspace mgr.setWorkspace(ArrayType.FF_CACHE, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG); } } boolean tbptt = getNetConfiguration().getBackpropType() == BackpropType.TruncatedBPTT; - FwdPassType fwdType = (tbptt ? FwdPassType.RNN_ACTIVATE_WITH_STORED_STATE - : FwdPassType.STANDARD); + FwdPassType fwdType = + (tbptt ? FwdPassType.RNN_ACTIVATE_WITH_STORED_STATE : FwdPassType.STANDARD); synchronizeIterEpochCounts(); - //Calculate activations (which are stored in each layer, and used in backprop) + // Calculate activations (which are stored in each layer, and used in backprop) try (MemoryWorkspace ws = mgr.notifyScopeEntered(ArrayType.ACTIVATIONS)) { - //First: do a feed-forward through the network - //Note that we don't actually need to do the full forward pass through the output layer right now; but we do + // First: do a feed-forward through the network + // Note that we don't actually need to do the full forward pass through the output layer right + // now; but we do // need the input to the output layer to be set (such that backprop can be done) - List activations = ffToLayerActivationsInWs(layers.length - 2, fwdType, tbptt, - input, mask, null); + List activations = + ffToLayerActivationsInWs(layers.length - 2, fwdType, tbptt, input, mask, null); if (!trainingListeners.isEmpty()) { - //TODO: We possibly do want output layer activations in some cases here... + // TODO: We possibly do want output layer activations in some cases here... for (TrainingListener tl : trainingListeners) { tl.onForwardPass(this, activations); } } INDArray inputToOutputLayer = activations.get(activations.size() - 1); if (getNetConfiguration().getInputPreProcess(layers.length - 1) != null) { - inputToOutputLayer = getNetConfiguration().getInputPreProcess(layers.length - 1) - .preProcess(inputToOutputLayer, getInputMiniBatchSize(), mgr); - //Validate activations location + inputToOutputLayer = + getNetConfiguration() + .getInputPreProcess(layers.length - 1) + .preProcess(inputToOutputLayer, getInputMiniBatchSize(), mgr); + // Validate activations location } getOutputLayer().setInput(inputToOutputLayer, mgr); - //Then: compute gradients + // Then: compute gradients Pair pair = calcBackpropGradients(null, true, false, false); this.gradient = (pair == null ? null : pair.getFirst()); - //Calculate score + // Calculate score try (MemoryWorkspace wsFF = mgr.notifyScopeEntered(ArrayType.FF_WORKING_MEM)) { double r = calcRegularizationScore(true); score = ((IOutputLayer) getOutputLayer()).computeScore(r, true, mgr); } - //Listeners + // Listeners if (!trainingListeners.isEmpty()) { try (MemoryWorkspace workspace = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { for (TrainingListener tl : trainingListeners) { @@ -3105,13 +3340,11 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } } - //Clear the post noise/dropconnect parameters on the output layer + // Clear the post noise/dropconnect parameters on the output layer getOutputLayer().clearNoiseWeightParams(); } - /** - * Clear the inputs. Clears optimizer state. - */ + /** Clear the inputs. Clears optimizer state. */ public void clear() { for (Layer layer : layers) { layer.clear(); @@ -3147,15 +3380,11 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial return ret; } - - /** - * See {@link #setParams(INDArray)} - */ + /** See {@link #setParams(INDArray)} */ public void setParameters(INDArray params) { setParams(params); } - public INDArray getLabels() { return labels; } @@ -3215,9 +3444,11 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } public Layer getLayer(int i) { - Preconditions.checkArgument(i >= 0 && i < layers.length, - "Invalid layer index: layer index must be 0" + - " to %s (inclusive), got index %s", layers.length - 1, i); + Preconditions.checkArgument( + i >= 0 && i < layers.length, + "Invalid layer index: layer index must be 0" + " to %s (inclusive), got index %s", + layers.length - 1, + i); return layers[i]; } @@ -3268,19 +3499,18 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial throw new UnsupportedOperationException("Not supported"); } - //========== - //LayerConfiguration methods + // ========== + // LayerConfiguration methods @Override - public Pair feedForwardMaskArray(INDArray maskArray, - MaskState currentMaskState, - int minibatchSize) { + public Pair feedForwardMaskArray( + INDArray maskArray, MaskState currentMaskState, int minibatchSize) { if (maskArray == null) { for (int i = 0; i < layers.length; i++) { layers[i].feedForwardMaskArray(null, null, minibatchSize); } } else { - //Do a forward pass through each preprocessor and layer + // Do a forward pass through each preprocessor and layer for (int i = 0; i < layers.length; i++) { InputPreProcessor preProcessor = getNetConfiguration().getInputPreProcess(i); @@ -3321,23 +3551,19 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial return Type.MULTILAYER; } - /** - * Equivalent to {@link #output(INDArray)} using the input set via {@link #setInput(INDArray)} - */ + /** Equivalent to {@link #output(INDArray)} using the input set via {@link #setInput(INDArray)} */ public INDArray activate(TrainingMode training) { return output(input, training == TrainingMode.TRAIN); } - /** - * Equivalent to {@link #output(INDArray, TrainingMode)} - */ + /** Equivalent to {@link #output(INDArray, TrainingMode)} */ public INDArray activate(INDArray input, TrainingMode training) { return output(input, training == TrainingMode.TRAIN); } @Override - public Pair backpropGradient(INDArray epsilon, - LayerWorkspaceMgr workspaceMgr) { + public Pair backpropGradient( + INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { if (getOutputLayer() instanceof IOutputLayer) { throw new UnsupportedOperationException( "Cannot calculate gradients based on epsilon with OutputLayer"); @@ -3408,7 +3634,6 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } // Update layerwise gradient view setBackpropGradientsViewArray(gradient.gradient()); - } @Override @@ -3445,50 +3670,61 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial /** * If this MultiLayerNetwork contains one or more RNN layers: conduct forward pass (prediction) * but using previous stored state for any RNN layers. The activations for the final step are also - * stored in the RNN layers for use next time rnnTimeStep() is called.
This method can be used - * to generate output one or more steps at a time instead of always having to do forward pass from - * t=0. Example uses are for streaming data, and for generating samples from network output one - * step at a time (where samples are then fed back into the network as input)
If no previous - * state is present in RNN layers (i.e., initially or after calling rnnClearPreviousState()), the - * default initialization (usually 0) is used.
Supports mini-batch (i.e., multiple - * predictions/forward pass in parallel) as well as for single examples.
+ * stored in the RNN layers for use next time rnnTimeStep() is called.
+ * This method can be used to generate output one or more steps at a time instead of always having + * to do forward pass from t=0. Example uses are for streaming data, and for generating samples + * from network output one step at a time (where samples are then fed back into the network as + * input)
+ * If no previous state is present in RNN layers (i.e., initially or after calling + * rnnClearPreviousState()), the default initialization (usually 0) is used.
+ * Supports mini-batch (i.e., multiple predictions/forward pass in parallel) as well as for single + * examples.
* * @param input Input to network. May be for one or multiple time steps. For single time step: - * input has shape [miniBatchSize,inputSize] or [miniBatchSize,inputSize,1]. - * miniBatchSize=1 for single example.
For multiple time steps: - * [miniBatchSize,inputSize,inputTimeSeriesLength] + * input has shape [miniBatchSize,inputSize] or [miniBatchSize,inputSize,1]. miniBatchSize=1 + * for single example.
+ * For multiple time steps: [miniBatchSize,inputSize,inputTimeSeriesLength] * @return Output activations. If output is RNN layer (such as RnnOutputLayer): if input has shape - * [miniBatchSize,inputSize] i.e., is 2d, output has shape [miniBatchSize,outputSize] (i.e., also - * 2d).
Otherwise output is 3d [miniBatchSize,outputSize,inputTimeSeriesLength] when using - * RnnOutputLayer. + * [miniBatchSize,inputSize] i.e., is 2d, output has shape [miniBatchSize,outputSize] (i.e., + * also 2d).
+ * Otherwise output is 3d [miniBatchSize,outputSize,inputTimeSeriesLength] when using + * RnnOutputLayer. * @see #rnnTimeStep(INDArray, MemoryWorkspace) For outputting the activations in the specified - * workspace + * workspace */ public INDArray rnnTimeStep(INDArray input) { return rnnTimeStep(input, null); } /** - * See {@link #rnnTimeStep(INDArray)} for details
If no memory workspace is provided, the - * output will be detached (not in any workspace).
If a memory workspace is provided, the - * output activation array (i.e., the INDArray returned by this method) will be placed in the - * specified workspace. This workspace must be opened by the user before calling this method - and - * the user is responsible for (a) closing this workspace, and (b) ensuring the output array is - * not used out of scope (i.e., not used after closing the workspace to which it belongs - as this - * is likely to cause either an exception when used, or a crash). + * See {@link #rnnTimeStep(INDArray)} for details
+ * If no memory workspace is provided, the output will be detached (not in any workspace).
+ * If a memory workspace is provided, the output activation array (i.e., the INDArray returned by + * this method) will be placed in the specified workspace. This workspace must be opened by the + * user before calling this method - and the user is responsible for (a) closing this workspace, + * and (b) ensuring the output array is not used out of scope (i.e., not used after closing the + * workspace to which it belongs - as this is likely to cause either an exception when used, or a + * crash). * - * @param input Input activations + * @param input Input activations * @param outputWorkspace Output workspace. May be null * @return The output/activations from the network (either detached or in the specified workspace - * if provided) + * if provided) */ public INDArray rnnTimeStep(INDArray input, MemoryWorkspace outputWorkspace) { try { boolean inputIs2d = input.rank() == 2; - INDArray out = outputOfLayerDetached(false, FwdPassType.RNN_TIMESTEP, layers.length - 1, - input, null, null, outputWorkspace); + INDArray out = + outputOfLayerDetached( + false, + FwdPassType.RNN_TIMESTEP, + layers.length - 1, + input, + null, + null, + outputWorkspace); if (inputIs2d && out.rank() == 3 && layers[layers.length - 1].type() == Type.RECURRENT) { - //Return 2d output with shape [miniBatchSize,nOut] + // Return 2d output with shape [miniBatchSize,nOut] // instead of 3d output with shape [miniBatchSize,nOut,1] return out.tensorAlongDimension(0, 1, 0); } @@ -3540,9 +3776,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial r.rnnSetPreviousState(state); } - /** - * Clear the previous state of the RNN layers (if any). - */ + /** Clear the previous state of the RNN layers (if any). */ public void rnnClearPreviousState() { if (layers == null) { return; @@ -3560,21 +3794,28 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } /** - * Similar to rnnTimeStep and feedForward() methods. Difference here is that this method:
(a) - * like rnnTimeStep does forward pass using stored state for RNN layers, and
(b) unlike - * rnnTimeStep does not modify the RNN layer state
Therefore multiple calls to this method - * with the same input should have the same output.
Typically used during training only. Use - * rnnTimeStep for prediction/forward pass at test time. + * Similar to rnnTimeStep and feedForward() methods. Difference here is that this method:
+ * (a) like rnnTimeStep does forward pass using stored state for RNN layers, and
+ * (b) unlike rnnTimeStep does not modify the RNN layer state
+ * Therefore multiple calls to this method with the same input should have the same output.
+ * Typically used during training only. Use rnnTimeStep for prediction/forward pass at test time. * - * @param input Input to network - * @param training Whether training or not + * @param input Input to network + * @param training Whether training or not * @param storeLastForTBPTT set to true if used as part of truncated BPTT training * @return Activations for each layer (including input, as per feedforward() etc) */ - public List rnnActivateUsingStoredState(INDArray input, boolean training, - boolean storeLastForTBPTT) { - return ffToLayerActivationsDetached(training, FwdPassType.RNN_ACTIVATE_WITH_STORED_STATE, - storeLastForTBPTT, layers.length - 1, input, mask, null, false); + public List rnnActivateUsingStoredState( + INDArray input, boolean training, boolean storeLastForTBPTT) { + return ffToLayerActivationsDetached( + training, + FwdPassType.RNN_ACTIVATE_WITH_STORED_STATE, + storeLastForTBPTT, + layers.length - 1, + input, + mask, + null, + false); } /** @@ -3586,12 +3827,15 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial return getUpdater(true); } - /** - * Set the updater for the MultiLayerNetwork - */ + /** Set the updater for the MultiLayerNetwork */ public void setUpdater(Updater updater) { if (solver == null) { - solver = new Solver.Builder().configure(getNetConfiguration()).listeners(getListeners()).model(this).build(); + solver = + new Solver.Builder() + .configure(getNetConfiguration()) + .listeners(this.getTrainingListeners()) + .model(this) + .build(); } solver.getOptimizer().setUpdater(updater); } @@ -3599,9 +3843,13 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial public Updater getUpdater(boolean initializeIfReq) { if (solver == null && initializeIfReq) { synchronized (this) { - if (solver == null) { //May have been created while waiting for lock - solver = new Solver.Builder().configure(getNetConfiguration()).listeners(getListeners()).model(this) - .build(); + if (solver == null) { // May have been created while waiting for lock + solver = + new Solver.Builder() + .configure(getNetConfiguration()) + .listeners(this.getTrainingListeners()) + .model(this) + .build(); solver.getOptimizer().setUpdater(UpdaterCreator.getUpdater(this)); } } @@ -3615,17 +3863,17 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial /** * Set the mask arrays for features and labels. Mask arrays are typically used in situations such * as one-to-many and many-to-one learning with recurrent neural networks, as well as for - * supporting time series of varying lengths within the same minibatch.
For example, with RNN - * data sets with input of shape [miniBatchSize,nIn,timeSeriesLength] and outputs of shape - * [miniBatchSize,nOut,timeSeriesLength], the features and mask arrays will have shape - * [miniBatchSize,timeSeriesLength] and contain values 0 or 1 at each element (to specify whether - * a given input/example is present - or merely padding - at a given time step).
- * NOTE: This method is not usually used directly. Instead, methods such as - * {@link #feedForward(INDArray, INDArray, INDArray)} and - * {@link #output(INDArray, boolean, INDArray, INDArray)} handle setting of masking internally. + * supporting time series of varying lengths within the same minibatch.
+ * For example, with RNN data sets with input of shape [miniBatchSize,nIn,timeSeriesLength] and + * outputs of shape [miniBatchSize,nOut,timeSeriesLength], the features and mask arrays will have + * shape [miniBatchSize,timeSeriesLength] and contain values 0 or 1 at each element (to specify + * whether a given input/example is present - or merely padding - at a given time step).
+ * NOTE: This method is not usually used directly. Instead, methods such as {@link + * #feedForward(INDArray, INDArray, INDArray)} and {@link #output(INDArray, boolean, INDArray, + * INDArray)} handle setting of masking internally. * * @param featuresMaskArray Mask array for features (input) - * @param labelsMaskArray Mask array for labels (output) + * @param labelsMaskArray Mask array for labels (output) * @see #clearLayerMaskArrays() */ public void setLayerMaskArrays(INDArray featuresMaskArray, INDArray labelsMaskArray) { @@ -3634,29 +3882,28 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial if (featuresMaskArray.size(0) > Integer.MAX_VALUE) { throw new ND4JArraySizeException(); } - //New approach: use feedForwardMaskArray method + // New approach: use feedForwardMaskArray method feedForwardMaskArray(featuresMaskArray, MaskState.Active, (int) featuresMaskArray.size(0)); + /* + //feedforward layers below a RNN layer: need the input (features) mask array + //Reason: even if the time series input is zero padded, the output from the dense layers are + // non-zero (i.e., activationFunction(0*weights + bias) != 0 in general) + //This assumes that the time series input is masked - i.e., values are 0 at the padded time steps, + // so we don't need to do anything for the recurrent layer - /* - //feedforward layers below a RNN layer: need the input (features) mask array - //Reason: even if the time series input is zero padded, the output from the dense layers are - // non-zero (i.e., activationFunction(0*weights + bias) != 0 in general) - //This assumes that the time series input is masked - i.e., values are 0 at the padded time steps, - // so we don't need to do anything for the recurrent layer + //Now, if mask array is 2d -> need to reshape to 1d (column vector) in the exact same order + // as is done for 3d -> 2d time series reshaping + INDArray reshapedFeaturesMask = TimeSeriesUtils.reshapeTimeSeriesMaskToVector(featuresMaskArray); - //Now, if mask array is 2d -> need to reshape to 1d (column vector) in the exact same order - // as is done for 3d -> 2d time series reshaping - INDArray reshapedFeaturesMask = TimeSeriesUtils.reshapeTimeSeriesMaskToVector(featuresMaskArray); + for( int i=0; i See {@link #setLayerMaskArrays(INDArray, INDArray)} - * for details on mask arrays. + * Remove the mask arrays from all layers.
+ * See {@link #setLayerMaskArrays(INDArray, INDArray)} for details on mask arrays. */ public void clearLayerMaskArrays() { for (Layer layer : layers) { @@ -3720,7 +3967,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial /** * @deprecated To be removed - use {@link #evaluateROC(DataSetIterator, int)} to enforce selection - * of appropriate ROC/threshold configuration + * of appropriate ROC/threshold configuration */ @Deprecated public T evaluateROC(DataSetIterator iterator) { @@ -3731,23 +3978,23 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial * Evaluate the network (must be a binary classifier) on the specified data, using the {@link ROC} * class * - * @param iterator Data to evaluate on + * @param iterator Data to evaluate on * @param rocThresholdSteps Number of threshold steps to use with {@link ROC} - see that class for - * details. + * details. * @return ROC evaluation on the given dataset */ public T evaluateROC(DataSetIterator iterator, int rocThresholdSteps) { Layer outputLayer = getOutputLayer(); if (getNetConfiguration().isValidateOutputLayerConfig()) { - OutputLayerUtil.validateOutputLayerForClassifierEvaluation(outputLayer.getLayerConfiguration(), - ROC.class); + OutputLayerUtil.validateOutputLayerForClassifierEvaluation( + outputLayer.getLayerConfiguration(), ROC.class); } return (T) doEvaluation(iterator, new org.deeplearning4j.eval.ROC(rocThresholdSteps))[0]; } /** * @deprecated To be removed - use {@link #evaluateROCMultiClass(DataSetIterator, int)} to enforce - * selection of appropriate ROC/threshold configuration + * selection of appropriate ROC/threshold configuration */ @Deprecated public T evaluateROCMultiClass(DataSetIterator iterator) { @@ -3757,19 +4004,19 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial /** * Evaluate the network on the specified data, using the {@link ROCMultiClass} class * - * @param iterator Data to evaluate on + * @param iterator Data to evaluate on * @param rocThresholdSteps Number of threshold steps to use with {@link ROCMultiClass} * @return Multi-class ROC evaluation on the given dataset */ - public T evaluateROCMultiClass(DataSetIterator iterator, - int rocThresholdSteps) { + public T evaluateROCMultiClass( + DataSetIterator iterator, int rocThresholdSteps) { Layer outputLayer = getOutputLayer(); if (getNetConfiguration().isValidateOutputLayerConfig()) { - OutputLayerUtil.validateOutputLayerForClassifierEvaluation(outputLayer.getLayerConfiguration(), - ROCMultiClass.class); + OutputLayerUtil.validateOutputLayerForClassifierEvaluation( + outputLayer.getLayerConfiguration(), ROCMultiClass.class); } - return (T) doEvaluation(iterator, - new org.deeplearning4j.eval.ROCMultiClass(rocThresholdSteps))[0]; + return (T) + doEvaluation(iterator, new org.deeplearning4j.eval.ROCMultiClass(rocThresholdSteps))[0]; } /** @@ -3786,8 +4033,8 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } } - public T[] doEvaluationHelper(DataSetIterator iterator, - T... evaluations) { + public T[] doEvaluationHelper( + DataSetIterator iterator, T... evaluations) { if (!iterator.hasNext() && iterator.resetSupported()) { iterator.reset(); } @@ -3796,22 +4043,26 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial iterator.asyncSupported() ? new AsyncDataSetIterator(iterator, 2, true) : iterator; WorkspaceMode cMode = getNetConfiguration().getTrainingWorkspaceMode(); - getNetConfiguration().setTrainingWorkspaceMode( - getNetConfiguration().getInferenceWorkspaceMode()); + getNetConfiguration() + .setTrainingWorkspaceMode(getNetConfiguration().getInferenceWorkspaceMode()); - //First: let's determine if we should do 'split feed forward' for long time series - //The idea: RNN 20k time steps. Train using TBPTT length 100 -> 200 segments of length 100. If we naively - // just use .output(INDArray) here, then our memory requirements are 200x larger than if we did the same + // First: let's determine if we should do 'split feed forward' for long time series + // The idea: RNN 20k time steps. Train using TBPTT length 100 -> 200 segments of length 100. If + // we naively + // just use .output(INDArray) here, then our memory requirements are 200x larger than if we did + // the same // evaluation in segments... - //Only do this if TBPTT is enabled - if not, it means we can train without TBPTT and hence should be able + // Only do this if TBPTT is enabled - if not, it means we can train without TBPTT and hence + // should be able // to test without splitting also - boolean useRnnSegments = (getNetConfiguration().getBackpropType() - == BackpropType.TruncatedBPTT); + boolean useRnnSegments = + (getNetConfiguration().getBackpropType() == BackpropType.TruncatedBPTT); MemoryWorkspace outputWs; if (getNetConfiguration().getInferenceWorkspaceMode() == WorkspaceMode.ENABLED) { - outputWs = Nd4j.getWorkspaceManager() - .getWorkspaceForCurrentThread(WS_ALL_LAYERS_ACT_CONFIG, WS_OUTPUT_MEM); + outputWs = + Nd4j.getWorkspaceManager() + .getWorkspaceForCurrentThread(WS_ALL_LAYERS_ACT_CONFIG, WS_OUTPUT_MEM); } else { outputWs = new DummyWorkspace(); } @@ -3830,10 +4081,11 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial List meta = next.getExampleMetaData(); if (!useRnnSegments) { - //Standard/non-RNN case: + // Standard/non-RNN case: try (MemoryWorkspace ws = outputWs.notifyScopeEntered()) { - INDArray out = outputOfLayerDetached(false, FwdPassType.STANDARD, layers.length - 1, - features, fMask, lMask, ws); + INDArray out = + outputOfLayerDetached( + false, FwdPassType.STANDARD, layers.length - 1, features, fMask, lMask, ws); try (MemoryWorkspace wsO = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) { for (T evaluation : evaluations) { @@ -3844,12 +4096,13 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } else { rnnClearPreviousState(); - //Get subset of features and labels: + // Get subset of features and labels: val fwdLen = getNetConfiguration().getTbpttFwdLength(); val tsLength = features.size(2); long nSubsets = tsLength / fwdLen; if (tsLength % fwdLen != 0) { - nSubsets++; //Example: 100 fwdLen with timeSeriesLength=120 -> want 2 subsets (1 of size 100, 1 of size 20) + nSubsets++; // Example: 100 fwdLen with timeSeriesLength=120 -> want 2 subsets (1 of size + // 100, 1 of size 20) } for (int i = 0; i < nSubsets; i++) { val startTimeIdx = i * fwdLen; @@ -3858,8 +4111,8 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial if (endTimeIdx > Integer.MAX_VALUE) { throw new ND4JArraySizeException(); } - INDArray[] subsets = getSubsetsForTbptt(startTimeIdx, (int) endTimeIdx, features, labels, - fMask, lMask); + INDArray[] subsets = + getSubsetsForTbptt(startTimeIdx, (int) endTimeIdx, features, labels, fMask, lMask); setLayerMaskArrays(subsets[2], subsets[3]); @@ -3874,7 +4127,8 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } } - //Clear inputs, masks etc. Important to avoid leaking invalidated/out of scope arrays between iterations + // Clear inputs, masks etc. Important to avoid leaking invalidated/out of scope arrays between + // iterations clearLayersStates(); } @@ -3893,7 +4147,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial * * @param iterator Data to undertake evaluation on * @return Evaluation object, summarizing the results of the evaluation on the provided - * DataSetIterator + * DataSetIterator */ public Evaluation evaluate(DataSetIterator iterator, List labelsList) { return evaluate(iterator, labelsList, 1); @@ -3924,8 +4178,8 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial fit(ds); } else { throw new DL4JInvalidInputException( - "MultiLayerNetwork can't handle MultiDataSet with more than 1 features or labels array." + - "Please consider use of ComputationGraph"); + "MultiLayerNetwork can't handle MultiDataSet with more than 1 features or labels array." + + "Please consider use of ComputationGraph"); } } @@ -3934,15 +4188,16 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial * number of epochs. Equvalent to calling {@link #fit(MultiDataSetIterator)} numEpochs times in a * loop * - * @param iterator Training data (DataSetIterator). Iterator must support resetting + * @param iterator Training data (DataSetIterator). Iterator must support resetting * @param numEpochs Number of training epochs, >= 1 */ public void fit(@NonNull MultiDataSetIterator iterator, int numEpochs) { - Preconditions.checkArgument(numEpochs > 0, "Number of epochs much be > 0. Got numEpochs = %s", - numEpochs); - Preconditions.checkArgument(numEpochs == 1 || iterator.resetSupported(), - "Cannot perform multiple epochs training using" + - "iterator has does not support resetting (iterator.resetSupported() returned false)"); + Preconditions.checkArgument( + numEpochs > 0, "Number of epochs much be > 0. Got numEpochs = %s", numEpochs); + Preconditions.checkArgument( + numEpochs == 1 || iterator.resetSupported(), + "Cannot perform multiple epochs training using" + + "iterator has does not support resetting (iterator.resetSupported() returned false)"); for (int i = 0; i < numEpochs; i++) { fit(iterator); @@ -3950,9 +4205,9 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } /** - * Perform minibatch training on all minibatches in the MultiDataSetIterator.
Note: The - * MultiDataSets in the MultiDataSetIterator must have exactly 1 input and output array (as - * MultiLayerNetwork only supports 1 input and 1 output) + * Perform minibatch training on all minibatches in the MultiDataSetIterator.
+ * Note: The MultiDataSets in the MultiDataSetIterator must have exactly 1 input and output array + * (as MultiLayerNetwork only supports 1 input and 1 output) * * @param iterator Training data (DataSetIterator). Iterator must support resetting */ @@ -3970,11 +4225,11 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial * Evaluate the network (for classification) on the provided data set, with top N accuracy in * addition to standard accuracy. For 'standard' accuracy evaluation only, use topN = 1 * - * @param iterator Iterator (data) to evaluate on + * @param iterator Iterator (data) to evaluate on * @param labelsList List of labels. May be null. - * @param topN N value for top N accuracy evaluation + * @param topN N value for top N accuracy evaluation * @return Evaluation object, summarizing the results of the evaluation on the provided - * DataSetIterator + * DataSetIterator */ public Evaluation evaluate(DataSetIterator iterator, List labelsList, int topN) { if (layers == null || !(getOutputLayer() instanceof IOutputLayer)) { @@ -3984,13 +4239,13 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial try { labelsList = iterator.getLabels(); } catch (Throwable t) { - } //Ignore, maybe UnsupportedOperationException etc + } // Ignore, maybe UnsupportedOperationException etc } Layer outputLayer = getOutputLayer(); if (getNetConfiguration().isValidateOutputLayerConfig()) { - OutputLayerUtil.validateOutputLayerForClassifierEvaluation(outputLayer.getLayerConfiguration(), - Evaluation.class); + OutputLayerUtil.validateOutputLayerForClassifierEvaluation( + outputLayer.getLayerConfiguration(), Evaluation.class); } Evaluation e = new org.deeplearning4j.eval.Evaluation(labelsList, topN); @@ -4036,10 +4291,17 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial List lines = new ArrayList<>(); if (inputType == null) { - lines.add(new String[]{"LayerName (LayerType)", "nIn,nOut", "TotalParams", "ParamsShape"}); + lines.add(new String[] {"LayerName (LayerType)", "nIn,nOut", "TotalParams", "ParamsShape"}); } else { - lines.add(new String[]{"LayerName (LayerType)", "nIn,nOut", "TotalParams", "ParamsShape", - "InputShape", "OutputShape"}); + lines.add( + new String[] { + "LayerName (LayerType)", + "nIn,nOut", + "TotalParams", + "ParamsShape", + "InputShape", + "OutputShape" + }); } int[] maxLength = new int[inputType == null ? 4 : 6]; String[] header = lines.get(0); @@ -4070,8 +4332,8 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial inputType = preProcessor.getOutputType(inputType); inShape += "--> " + inputType.toString(); } - outType = currentLayer.getLayerConfiguration() - .getOutputType(currentLayer.getIndex(), inputType); + outType = + currentLayer.getLayerConfiguration().getOutputType(currentLayer.getIndex(), inputType); outShape = outType.toString(); inputType = outType; } @@ -4084,8 +4346,8 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } else { try { in = String.valueOf(((FeedForwardLayer) currentLayer.getLayerConfiguration()).getNIn()); - out = String.valueOf( - ((FeedForwardLayer) currentLayer.getLayerConfiguration()).getNOut()); + out = + String.valueOf(((FeedForwardLayer) currentLayer.getLayerConfiguration()).getNOut()); } catch ( Exception e) { // Some layers, like PReLU, are just BaseLayers (but have parameters) } @@ -4099,17 +4361,24 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } if (currentLayer instanceof FrozenLayer) { frozenParams += currentLayer.numParams(); - classNameArr = ((FrozenLayer) currentLayer).getInsideLayer().getClass().getName() - .split("\\."); + classNameArr = + ((FrozenLayer) currentLayer).getInsideLayer().getClass().getName().split("\\."); className = "Frozen " + classNameArr[classNameArr.length - 1]; } String[] line; if (inputType == null) { - line = new String[]{name + " (" + className + ")", in + "," + out, paramCount, paramShape}; + line = new String[] {name + " (" + className + ")", in + "," + out, paramCount, paramShape}; } else { - line = new String[]{name + " (" + className + ")", in + "," + out, paramCount, paramShape, - inShape, outShape}; + line = + new String[] { + name + " (" + className + ")", + in + "," + out, + paramCount, + paramShape, + inShape, + outShape + }; } for (int i = 0; i < line.length; i++) { maxLength[i] = Math.max(maxLength[i], line[i] == null ? 0 : line[i].length()); @@ -4133,8 +4402,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial sbFormat.append("\n"); String format = sbFormat.toString(); - ret.append(StringUtils.repeat("=", totalLength)) - .append("\n"); + ret.append(StringUtils.repeat("=", totalLength)).append("\n"); boolean first = true; for (String[] line : lines) { @@ -4147,9 +4415,11 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } ret.append(StringUtils.repeat("-", totalLength)); - ret.append(String.format("\n%30s %,d", "Total Parameters: ", params().length())); + ret.append(String.format("\n%30s %,d", "Total Parameters: ", getModelParams().length())); ret.append( - String.format("\n%30s %,d", "Trainable Parameters: ", params().length() - frozenParams)); + String.format( + "\n%30s %,d", + "ITrainableLayer Parameters: ", getModelParams().length() - frozenParams)); ret.append(String.format("\n%30s %,d", "Frozen Parameters: ", frozenParams)); ret.append("\n"); ret.append(StringUtils.repeat("=", totalLength)); @@ -4162,9 +4432,9 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial * minibatch size. Note that when using workspaces or CuDNN, the network should be trained for * some iterations so that the memory workspaces have time to initialize. Without this, the memory * requirements during training may be underestimated. - *

- * Note also that this is the same information that is generated during an OOM crash when training - * or performing inference. + * + *

Note also that this is the same information that is generated during an OOM crash when + * training or performing inference. * * @param minibatch Minibatch size to estimate memory for * @param inputType Input type to the network @@ -4174,9 +4444,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial return CrashReportingUtil.generateMemoryStatus(this, minibatch, inputType); } - /** - * This method just makes sure there's no state preserved within layers - */ + /** This method just makes sure there's no state preserved within layers */ public void clearLayersStates() { for (Layer layer : layers) { layer.clear(); @@ -4186,14 +4454,15 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial /** * Increment the epoch count (in the underlying {@link NeuralNetConfiguration} by 1). Note that - * this is done automatically when using iterator-based fitting methods, such as - * {@link #fit(DataSetIterator)}. However, when using non-iterator fit methods (DataSet, + * this is done automatically when using iterator-based fitting methods, such as {@link + * #fit(DataSetIterator)}. However, when using non-iterator fit methods (DataSet, * INDArray/INDArray etc), the network has no way to know when one epoch ends and another starts. - * In such situations, this method can be used to increment the epoch counter.
Note that the - * epoch counter is used for situations such as some learning rate schedules, and the like. - *

- * The current epoch count can be obtained using - * {@code NeuralNetConfiguration.getLayerwiseConfiguration().getEpochCount()} + * In such situations, this method can be used to increment the epoch counter.
+ * Note that the epoch counter is used for situations such as some learning rate schedules, and + * the like. + * + *

The current epoch count can be obtained using {@code + * NeuralNetConfiguration.getLayerwiseConfiguration().getEpochCount()} */ public void incrementEpochCount() { getNetConfiguration().setEpochCount(getNetConfiguration().getEpochCount() + 1); @@ -4201,7 +4470,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } protected void synchronizeIterEpochCounts() { - //TODO: this is necessary for some schedules - but the redundant values are a little ugly... + // TODO: this is necessary for some schedules - but the redundant values are a little ugly... int currIter = getIterationCount(); int currEpoch = getEpochCount(); for (Layer l : layers) { @@ -4226,9 +4495,9 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial /** * Save the MultiLayerNetwork to a file. Restore using {@link #load(File, boolean)}. * - * @param f File to save the network to + * @param f File to save the network to * @param saveUpdater If true: save the updater (i.e., the state array for momentum/Adam/rmsprop - * etc), which should usually be saved if further training is required + * etc), which should usually be saved if further training is required * @see ModelSerializer ModelSerializer for more details (and saving/loading via streams) * @see #save(File, boolean) */ @@ -4255,14 +4524,16 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial * @return The network, set to use the specified datatype for the parameters and activations */ public MultiLayerNetwork convertDataType(@NonNull DataType dataType) { - Preconditions.checkState(dataType.isFPType(), - "Invalid DataType: %s. Can only convert network to a floating point type", dataType); - if (dataType == params().dataType()) { + Preconditions.checkState( + dataType.isFPType(), + "Invalid DataType: %s. Can only convert network to a floating point type", + dataType); + if (dataType == getModelParams().dataType()) { return this; } try (MemoryWorkspace ws = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { - INDArray newParams = params().castTo(dataType); + INDArray newParams = getModelParams().castTo(dataType); String jsonConfig = getNetConfiguration().toJson(); NeuralNetConfiguration newConf = NeuralNetConfiguration.fromJson(jsonConfig); newConf.setDataType(dataType); @@ -4297,9 +4568,9 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial /** * Set the learning rate schedule for all layers in the network to the specified schedule. This * schedule will replace any/all existing schedules, and also any fixed learning rate values.
- * Note that the iteration/epoch counts will not be reset. Use - * {@link NeuralNetConfiguration#setIterationCount(int)} and - * {@link NeuralNetConfiguration#setEpochCount(int)} if this is required + * Note that the iteration/epoch counts will not be reset. Use {@link + * NeuralNetConfiguration#setIterationCount(int)} and {@link + * NeuralNetConfiguration#setEpochCount(int)} if this is required * * @param newLr New learning rate schedule for all layers * @see #setLearningRate(ISchedule) @@ -4320,7 +4591,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial * to be set to a new LR * * @param layerNumber Number of the layer to set the LR for - * @param newLr New learning rate for a single layer + * @param newLr New learning rate for a single layer * @see #setLearningRate(ISchedule) * @see #setLearningRate(int, double) */ @@ -4331,13 +4602,15 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial /** * Set the learning rate schedule for a single layer in the network to the specified value.
* Note also that {@link #setLearningRate(ISchedule)} should also be used in preference, when all - * layers need to be set to a new LR schedule.
This schedule will replace any/all existing - * schedules, and also any fixed learning rate values.
Note also that the iteration/epoch - * counts will not be reset. Use {@link NeuralNetConfiguration#setIterationCount(int)} and - * {@link NeuralNetConfiguration#setEpochCount(int)} if this is required + * layers need to be set to a new LR schedule.
+ * This schedule will replace any/all existing schedules, and also any fixed learning rate values. + *
+ * Note also that the iteration/epoch counts will not be reset. Use {@link + * NeuralNetConfiguration#setIterationCount(int)} and {@link + * NeuralNetConfiguration#setEpochCount(int)} if this is required * * @param layerNumber Number of the layer to set the LR schedule for - * @param newLr New learning rate for a single layer + * @param newLr New learning rate for a single layer * @see #setLearningRate(ISchedule) * @see #setLearningRate(int, double) */ @@ -4358,11 +4631,11 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } /** - * Return the layer size (number of units) for the specified layer.
Note that the meaning of - * the "layer size" can depend on the type of layer. For example:
- DenseLayer, OutputLayer, - * recurrent layers: number of units (nOut configuration option)
- ConvolutionLayer: the - * channels (number of channels)
- Subsampling layers, global pooling layers, etc: size of 0 - * is always returned
+ * Return the layer size (number of units) for the specified layer.
+ * Note that the meaning of the "layer size" can depend on the type of layer. For example:
+ * - DenseLayer, OutputLayer, recurrent layers: number of units (nOut configuration option)
+ * - ConvolutionLayer: the channels (number of channels)
+ * - Subsampling layers, global pooling layers, etc: size of 0 is always returned
* * @param layer Index of the layer to get the size of. Must be in range 0 to nLayers-1 inclusive * @return Size of the layer @@ -4370,8 +4643,11 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial public int layerSize(int layer) { if (layer < 0 || layer > layers.length) { throw new IllegalArgumentException( - "Invalid layer index: " + layer + ". LayerConfiguration index must be between 0 and " - + (layers.length - 1) + " inclusive"); + "Invalid layer index: " + + layer + + ". LayerConfiguration index must be between 0 and " + + (layers.length - 1) + + " inclusive"); } LayerConfiguration conf = layers[layer].getLayerConfiguration(); if (conf == null || !(conf instanceof FeedForwardLayer)) { @@ -4386,12 +4662,12 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } /** - * Return the input size (number of inputs) for the specified layer.
Note that the meaning of - * the "input size" can depend on the type of layer. For example:
- DenseLayer, OutputLayer, - * etc: the feature vector size (nIn configuration option)
- Recurrent layers: the feature - * vector size per time step (nIn configuration option)
- ConvolutionLayer: the - * channels (number of channels)
- Subsampling layers, global pooling layers, etc: size of 0 - * is always returned
+ * Return the input size (number of inputs) for the specified layer.
+ * Note that the meaning of the "input size" can depend on the type of layer. For example:
+ * - DenseLayer, OutputLayer, etc: the feature vector size (nIn configuration option)
+ * - Recurrent layers: the feature vector size per time step (nIn configuration option)
+ * - ConvolutionLayer: the channels (number of channels)
+ * - Subsampling layers, global pooling layers, etc: size of 0 is always returned
* * @param layer Index of the layer to get the size of. Must be in range 0 to nLayers-1 inclusive * @return Size of the layer @@ -4399,8 +4675,11 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial public int layerInputSize(int layer) { if (layer < 0 || layer > layers.length) { throw new IllegalArgumentException( - "Invalid layer index: " + layer + ". LayerConfiguration index must be between 0 and " - + (layers.length - 1) + " inclusive"); + "Invalid layer index: " + + layer + + ". LayerConfiguration index must be between 0 and " + + (layers.length - 1) + + " inclusive"); } LayerConfiguration conf = layers[layer].getLayerConfiguration(); if (conf == null || !(conf instanceof FeedForwardLayer)) { @@ -4416,42 +4695,34 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial /** * Indicates whether some other object is "equal to" this one. - *

- * The {@code equals} method implements an equivalence relation on non-null object references: + * + *

The {@code equals} method implements an equivalence relation on non-null object references: + * *

    - *
  • It is reflexive: for any non-null reference value - * {@code x}, {@code x.equals(x)} should return - * {@code true}. - *
  • It is symmetric: for any non-null reference values - * {@code x} and {@code y}, {@code x.equals(y)} - * should return {@code true} if and only if - * {@code y.equals(x)} returns {@code true}. - *
  • It is transitive: for any non-null reference values - * {@code x}, {@code y}, and {@code z}, if - * {@code x.equals(y)} returns {@code true} and - * {@code y.equals(z)} returns {@code true}, then - * {@code x.equals(z)} should return {@code true}. - *
  • It is consistent: for any non-null reference values - * {@code x} and {@code y}, multiple invocations of - * {@code x.equals(y)} consistently return {@code true} - * or consistently return {@code false}, provided no - * information used in {@code equals} comparisons on the - * objects is modified. - *
  • For any non-null reference value {@code x}, - * {@code x.equals(null)} should return {@code false}. + *
  • It is reflexive: for any non-null reference value {@code x}, {@code x.equals(x)} + * should return {@code true}. + *
  • It is symmetric: for any non-null reference values {@code x} and {@code y}, {@code + * x.equals(y)} should return {@code true} if and only if {@code y.equals(x)} returns {@code + * true}. + *
  • It is transitive: for any non-null reference values {@code x}, {@code y}, and + * {@code z}, if {@code x.equals(y)} returns {@code true} and {@code y.equals(z)} returns + * {@code true}, then {@code x.equals(z)} should return {@code true}. + *
  • It is consistent: for any non-null reference values {@code x} and {@code y}, + * multiple invocations of {@code x.equals(y)} consistently return {@code true} or + * consistently return {@code false}, provided no information used in {@code equals} + * comparisons on the objects is modified. + *
  • For any non-null reference value {@code x}, {@code x.equals(null)} should return {@code + * false}. *
- *

- * The {@code equals} method for class {@code Object} implements - * the most discriminating possible equivalence relation on objects; - * that is, for any non-null reference values {@code x} and - * {@code y}, this method returns {@code true} if and only - * if {@code x} and {@code y} refer to the same object - * ({@code x == y} has the value {@code true}). - *

- * Note that it is generally necessary to override the {@code hashCode} - * method whenever this method is overridden, so as to maintain the - * general contract for the {@code hashCode} method, which states - * that equal objects must have equal hash codes. + * + *

The {@code equals} method for class {@code Object} implements the most discriminating + * possible equivalence relation on objects; that is, for any non-null reference values {@code x} + * and {@code y}, this method returns {@code true} if and only if {@code x} and {@code y} refer to + * the same object ({@code x == y} has the value {@code true}). + * + *

Note that it is generally necessary to override the {@code hashCode} method whenever this + * method is overridden, so as to maintain the general contract for the {@code hashCode} method, + * which states that equal objects must have equal hash codes. * * @param obj the reference object with which to compare. * @return {@code true} if this object is the same as the obj argument; {@code false} otherwise. @@ -4465,9 +4736,8 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } if (obj instanceof MultiLayerNetwork) { MultiLayerNetwork network = (MultiLayerNetwork) obj; - boolean paramsEquals = network.params().equals(params()); - boolean confEquals = getNetConfiguration().equals( - network.getNetConfiguration()); + boolean paramsEquals = network.getModelParams().equals(getModelParams()); + boolean confEquals = getNetConfiguration().equals(network.getNetConfiguration()); boolean updaterEquals = getUpdater().equals(network.getUpdater()); return paramsEquals && confEquals && updaterEquals; } @@ -4481,15 +4751,17 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial private void readObject(ObjectInputStream ois) throws ClassNotFoundException, IOException { val mln = ModelSerializer.restoreMultiLayerNetwork(ois, true); - this.setNetConfiguration( mln.getNetConfiguration().clone() ); + this.setNetConfiguration(mln.getNetConfiguration().clone()); this.init(); this.flattenedParams.assign(mln.flattenedParams); - int numWorkingMem = 2 * (getNetConfiguration().getFlattenedLayerConfigurations().size() - + getNetConfiguration().getInputPreProcessors().size()); + int numWorkingMem = + 2 + * (getNetConfiguration().getFlattenedLayerConfigurations().size() + + getNetConfiguration().getInputPreProcessors().size()); WS_LAYER_WORKING_MEM_CONFIG = getLayerWorkingMemWSConfig(numWorkingMem); - WS_LAYER_ACT_X_CONFIG = getLayerActivationWSConfig( - getNetConfiguration().getFlattenedLayerConfigurations().size()); + WS_LAYER_ACT_X_CONFIG = + getLayerActivationWSConfig(getNetConfiguration().getFlattenedLayerConfigurations().size()); if (mln.getUpdater() != null && mln.getUpdater(false).getStateViewArray() != null) { this.getUpdater(true).getStateViewArray().assign(mln.getUpdater(false).getStateViewArray()); @@ -4503,7 +4775,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial */ @Override public void close() { - //Close the INDArray and dealloc + // Close the INDArray and dealloc if (flattenedParams.closeable()) { flattenedParams.close(); } @@ -4533,5 +4805,4 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial public String toString() { return getNetConfiguration().toString(); } - } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/PReLUParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/PReLUParamInitializer.java index 3de33be57..32b05a04c 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/PReLUParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/PReLUParamInitializer.java @@ -22,9 +22,7 @@ package org.deeplearning4j.nn.params; import lombok.val; import org.deeplearning4j.nn.api.AbstractParamInitializer; -import org.deeplearning4j.nn.api.ParamInitializer; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.layers.BaseLayer; +import org.deeplearning4j.nn.conf.layers.BaseLayerConfiguration; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.PReLULayer; import org.deeplearning4j.nn.weights.IWeightInit; @@ -99,7 +97,7 @@ public class PReLUParamInitializer extends AbstractParamInitializer { @Override public Map init(LayerConfiguration conf, INDArray paramsView, boolean initializeParams) { - if (!(conf instanceof BaseLayer)) + if (!(conf instanceof BaseLayerConfiguration)) throw new IllegalArgumentException("unsupported layer type: " + conf.getClass().getName()); Map params = Collections.synchronizedMap(new LinkedHashMap()); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/WrapperLayerParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/WrapperLayerParamInitializer.java index 7cb7059c8..5744e70ad 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/WrapperLayerParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/WrapperLayerParamInitializer.java @@ -21,10 +21,8 @@ package org.deeplearning4j.nn.params; import org.deeplearning4j.nn.api.AbstractParamInitializer; -import org.deeplearning4j.nn.api.ParamInitializer; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; -import org.deeplearning4j.nn.conf.layers.wrapper.BaseWrapperLayer; +import org.deeplearning4j.nn.conf.layers.wrapper.BaseWrapperLayerConfiguration; import org.nd4j.linalg.api.ndarray.INDArray; import java.util.List; @@ -99,8 +97,8 @@ public class WrapperLayerParamInitializer extends AbstractParamInitializer { } private LayerConfiguration underlying(LayerConfiguration layer){ - while (layer instanceof BaseWrapperLayer) { - layer = ((BaseWrapperLayer)layer).getUnderlying(); + while (layer instanceof BaseWrapperLayerConfiguration) { + layer = ((BaseWrapperLayerConfiguration)layer).getUnderlying(); } return layer; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/FineTuneConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/FineTuneConfiguration.java index 73a31b96b..5d68bd890 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/FineTuneConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/FineTuneConfiguration.java @@ -42,7 +42,7 @@ import org.deeplearning4j.nn.conf.WorkspaceMode; import org.deeplearning4j.nn.conf.distribution.Distribution; import org.deeplearning4j.nn.conf.dropout.Dropout; import org.deeplearning4j.nn.conf.dropout.IDropout; -import org.deeplearning4j.nn.conf.layers.BaseLayer; +import org.deeplearning4j.nn.conf.layers.BaseLayerConfiguration; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.LayerValidation; @@ -146,7 +146,7 @@ public class FineTuneConfiguration { if (layerConfiguration != null) { //As per NeuralNetConfiguration.configureLayer and LayerValidation.configureBaseLayer: only copy dropout to base layers // this excludes things like subsampling and activation layers - if (dropout != null && layerConfiguration instanceof BaseLayer) { + if (dropout != null && layerConfiguration instanceof BaseLayerConfiguration) { IDropout d = dropout.orElse(null); if (d != null) { d = d.clone(); //Clone to avoid shared state between layers @@ -158,8 +158,8 @@ public class FineTuneConfiguration { } } - if (layerConfiguration != null && layerConfiguration instanceof BaseLayer) { - BaseLayer bl = (BaseLayer) layerConfiguration; + if (layerConfiguration != null && layerConfiguration instanceof BaseLayerConfiguration) { + BaseLayerConfiguration bl = (BaseLayerConfiguration) layerConfiguration; if (activationFn != null) { bl.setActivationFn(activationFn); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearning.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearning.java index 8cc50854b..663420f0a 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearning.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearning.java @@ -322,7 +322,7 @@ public class TransferLearning { if (numParams > 0) { params = Nd4j.create(origModel.getNetConfiguration().getDataType(), 1, numParams); org.deeplearning4j.nn.api.Layer someLayer = layerConf.instantiate(layerConf.getNetConfiguration(), null, 0, params, true, dataType); - appendParams.add(someLayer.params()); + appendParams.add(someLayer.getParams()); appendConfs.add(someLayer.getLayerConfiguration()); } else { appendConfs.add(layerConf); @@ -400,9 +400,9 @@ public class TransferLearning { for (int i = 0; i < origModel.getnLayers(); i++) { if (origModel.getLayer(i).numParams() > 0) { //dup only if params are there - editedParams.add(origModel.getLayer(i).params().dup()); + editedParams.add(origModel.getLayer(i).getParams().dup()); } else { - editedParams.add(origModel.getLayer(i).params()); + editedParams.add(origModel.getLayer(i).getParams()); } } //apply changes to nout/nin if any in sorted order and save to editedParams @@ -467,7 +467,7 @@ public class TransferLearning { long numParams = layerImpl.initializer().numParams(layerConf); INDArray params = Nd4j.create(origModel.getNetConfiguration().getDataType(), 1, numParams); org.deeplearning4j.nn.api.Layer someLayer = layerImpl.instantiate(layerConf.getNetConfiguration(), null, 0, params, true, dataType); - editedParams.set(layerNum, someLayer.params()); + editedParams.set(layerNum, someLayer.getParams()); } @@ -485,7 +485,7 @@ public class TransferLearning { long numParams = layerImpl.initializer().numParams(layerConf); INDArray params = Nd4j.create(origModel.getNetConfiguration().getDataType(), 1, numParams); org.deeplearning4j.nn.api.Layer someLayer = layerImpl.instantiate(layerConf.getNetConfiguration(), null, 0, params, true, dataType); - editedParams.set(layerNum, someLayer.params()); + editedParams.set(layerNum, someLayer.getParams()); if (layerNum + 1 < editedConfs.size()) { layerConf = editedConfs.get(layerNum + 1); @@ -498,7 +498,7 @@ public class TransferLearning { if (numParams > 0) { params = Nd4j.create(origModel.getNetConfiguration().getDataType(), 1, numParams); someLayer = layerImpl.instantiate(layerConf.getNetConfiguration(), null, 0, params, true, dataType); - editedParams.set(layerNum + 1, someLayer.params()); + editedParams.set(layerNum + 1, someLayer.getParams()); } } } @@ -979,11 +979,11 @@ public class TransferLearning { continue; //some layers have no params if (editedVertices.contains(layerName)) continue; //keep the changed params - INDArray origParams = origGraph.getLayer(layerName).params(); + INDArray origParams = origGraph.getLayer(layerName).getParams(); layer.setParams(origParams.dup()); //copy over origGraph params } } else { - newGraph.setParams(origGraph.params()); + newGraph.setParams(origGraph.getModelParams()); } //Freeze layers as necessary. Note: we can't simply say "everything before frozen layer X needs to be frozen diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearningHelper.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearningHelper.java index effc48ad4..bd6cc18a3 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearningHelper.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearningHelper.java @@ -295,7 +295,7 @@ public class TransferLearningHelper { unFrozenSubsetMLN.init(); //copy over params for (int i = frozenInputLayer + 1; i < origMLN.getnLayers(); i++) { - unFrozenSubsetMLN.getLayer(i - frozenInputLayer - 1).setParams(origMLN.getLayer(i).params()); + unFrozenSubsetMLN.getLayer(i - frozenInputLayer - 1).setParams(origMLN.getLayer(i).getParams()); } //unFrozenSubsetMLN.setListeners(origMLN.getListeners()); } @@ -413,7 +413,7 @@ public class TransferLearningHelper { for (GraphVertex aVertex : unFrozenSubsetGraph.getVertices()) { if (!aVertex.hasLayer()) continue; - origGraph.getVertex(aVertex.getVertexName()).getLayer().setParams(aVertex.getLayer().params()); + origGraph.getVertex(aVertex.getVertexName()).getLayer().setParams(aVertex.getLayer().getParams()); } } @@ -421,13 +421,13 @@ public class TransferLearningHelper { for (GraphVertex aVertex : unFrozenSubsetGraph.getVertices()) { if (!aVertex.hasLayer()) continue; - aVertex.getLayer().setParams(origGraph.getLayer(aVertex.getVertexName()).params()); + aVertex.getLayer().setParams(origGraph.getLayer(aVertex.getVertexName()).getParams()); } } private void copyParamsFromSubsetMLNToOrig() { for (int i = frozenInputLayer + 1; i < origMLN.getnLayers(); i++) { - origMLN.getLayer(i).setParams(unFrozenSubsetMLN.getLayer(i - frozenInputLayer - 1).params()); + origMLN.getLayer(i).setParams(unFrozenSubsetMLN.getLayer(i - frozenInputLayer - 1).getParams()); } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/BaseMultiLayerUpdater.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/BaseMultiLayerUpdater.java index dfcef372c..cec9da44a 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/BaseMultiLayerUpdater.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/BaseMultiLayerUpdater.java @@ -22,7 +22,7 @@ package org.deeplearning4j.nn.updater; import lombok.Getter; import net.brutex.ai.dnn.api.IModel; -import org.deeplearning4j.nn.api.Trainable; +import org.deeplearning4j.nn.api.ITrainableLayer; import org.deeplearning4j.nn.api.Updater; import org.deeplearning4j.nn.conf.GradientNormalization; import org.deeplearning4j.nn.gradient.DefaultGradient; @@ -47,7 +47,7 @@ import java.util.*; public abstract class BaseMultiLayerUpdater implements Updater { protected final T network; - protected Map layersByName; + protected Map layersByName; protected final List updaterBlocks; protected INDArray updaterStateViewArray; protected boolean initializedMinibatchDivision; @@ -64,19 +64,19 @@ public abstract class BaseMultiLayerUpdater implements Updater */ public BaseMultiLayerUpdater(T network, INDArray updaterState) { this.network = network; - Trainable[] layers = getOrderedLayers(); //May also include vertices + ITrainableLayer[] layers = getOrderedLayers(); //May also include vertices int updaterStateSize = 0; //Iterate through layers, and variables for each layer. //While the updater configuration is the same: combine into one op, rather than doing a lot of smaller // (yet identical) ops. - Trainable lastLayer = null; + ITrainableLayer lastLayer = null; String lastVariable = null; UpdaterBlock currentBlock = null; updaterBlocks = new ArrayList<>(); - INDArray paramsView = network.params(); + INDArray paramsView = network.getModelParams(); INDArray gradientView = getFlattenedGradientsView(); int paramsViewSoFar = 0; int currentUpdaterOffset = 0; @@ -87,8 +87,8 @@ public abstract class BaseMultiLayerUpdater implements Updater for (int j = 0; j < variables.size(); j++) { String var = variables.get(j); long paramSizeThisVariable = layerParamTable.get(var).length(); - IUpdater u = layers[i].getConfig().getUpdaterByParam(var); - Preconditions.checkNotNull(u, "Updater for parameter %s, layer \"%s\" was null", var, layers[i].getConfig().getLayerName()); + IUpdater u = layers[i].getTrainingConfig().getUpdaterByParam(var); + Preconditions.checkNotNull(u, "Updater for parameter %s, layer \"%s\" was null", var, layers[i].getTrainingConfig().getLayerName()); int updaterStateSizeThisVariable = (int) u.stateSize(paramSizeThisVariable); INDArray gradientViewSubset = null; @@ -145,7 +145,7 @@ public abstract class BaseMultiLayerUpdater implements Updater updaterRequiresInit = false; } else if (updaterStateSize > 0) { //May be 0 if all SGD or NONE updaters, for example - updaterStateViewArray = Nd4j.createUninitialized(network.params().dataType(), new long[] {1, updaterStateSize}, Nd4j.order()); + updaterStateViewArray = Nd4j.createUninitialized(network.getModelParams().dataType(), new long[] {1, updaterStateSize}, Nd4j.order()); updaterRequiresInit = true; } @@ -183,7 +183,7 @@ public abstract class BaseMultiLayerUpdater implements Updater * @return Array of layers, in the correct order (i.e., same order as the parameter/gradient/updater flattening * order - input to output for MultiLayerNetwork, or topological order for ComputationGraph) */ - protected abstract Trainable[] getOrderedLayers(); + protected abstract ITrainableLayer[] getOrderedLayers(); /** * @return The flattened gradient view array for the model @@ -220,7 +220,7 @@ public abstract class BaseMultiLayerUpdater implements Updater } @Override - public void setStateViewArray(Trainable layer, INDArray viewArray, boolean initialize) { + public void setStateViewArray(ITrainableLayer layer, INDArray viewArray, boolean initialize) { this.setStateViewArray(viewArray); } @@ -241,7 +241,7 @@ public abstract class BaseMultiLayerUpdater implements Updater } @Override - public void update(Trainable layer, Gradient gradient, int iteration, int epoch, int batchSize, LayerWorkspaceMgr workspaceMgr) { + public void update(ITrainableLayer layer, Gradient gradient, int iteration, int epoch, int batchSize, LayerWorkspaceMgr workspaceMgr) { update(gradient, iteration, epoch, batchSize, workspaceMgr); } @@ -266,9 +266,9 @@ public abstract class BaseMultiLayerUpdater implements Updater //Split up the gradients on a per-layer basis, for pre-apply Map layerGradients = new HashMap<>(); - Trainable[] layers = getOrderedLayers(); + ITrainableLayer[] layers = getOrderedLayers(); if (layers.length == 1 && isSingleLayerUpdater()) { - layerGradients.put(layers[0].getConfig().getLayerName(), gradient); + layerGradients.put(layers[0].getTrainingConfig().getLayerName(), gradient); } else { for (Map.Entry gradientPair : gradient.gradientForVariable().entrySet()) { String key = gradientPair.getKey(); @@ -296,7 +296,7 @@ public abstract class BaseMultiLayerUpdater implements Updater //PRE apply (gradient clipping, etc): done on a per-layer basis for (Map.Entry entry : layerGradients.entrySet()) { String layerName = entry.getKey(); - Trainable layer = layersByName.get(layerName); + ITrainableLayer layer = layersByName.get(layerName); preApply(layer, layerGradients.get(layerName), iteration); } @@ -350,7 +350,7 @@ public abstract class BaseMultiLayerUpdater implements Updater long paramsSoFar = 0; long currentStart = 0; long currentEnd = 0; - for(Trainable t : getOrderedLayers()){ + for(ITrainableLayer t : getOrderedLayers()){ Set layerParams = t.getParamTable(false).keySet(); Map paramTable = t.getParamTable(false); for(String s : layerParams) { @@ -389,18 +389,18 @@ public abstract class BaseMultiLayerUpdater implements Updater * @param gradient Gradient to update * @param iteration The current iteration (i.e., number of parameter updates so far) */ - public void preApply(Trainable layer, Gradient gradient, int iteration) { + public void preApply(ITrainableLayer layer, Gradient gradient, int iteration) { - if (layer.getConfig() == null || layer.numParams() == 0) { + if (layer.getTrainingConfig() == null || layer.numParams() == 0) { //ILayer does not have parameters -> no gradient return; } - GradientNormalization normalization = layer.getConfig().getGradientNormalization(); + GradientNormalization normalization = layer.getTrainingConfig().getGradientNormalization(); if (normalization == null || normalization == GradientNormalization.None) return; //no op - final double threshold = layer.getConfig().getGradientNormalizationThreshold(); + final double threshold = layer.getTrainingConfig().getGradientNormalizationThreshold(); INDArray layerGradientView = layer.getGradientsViewArray(); switch (normalization) { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/LayerUpdater.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/LayerUpdater.java index f27e7dcfa..3dafbb3f9 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/LayerUpdater.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/LayerUpdater.java @@ -22,7 +22,7 @@ package org.deeplearning4j.nn.updater; import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.nn.api.Layer; -import org.deeplearning4j.nn.api.Trainable; +import org.deeplearning4j.nn.api.ITrainableLayer; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.nd4j.linalg.api.ndarray.INDArray; @@ -46,8 +46,8 @@ public class LayerUpdater extends BaseMultiLayerUpdater { } @Override - protected Trainable[] getOrderedLayers() { - return new Trainable[] {network}; + protected ITrainableLayer[] getOrderedLayers() { + return new ITrainableLayer[] {network}; } @Override @@ -57,7 +57,7 @@ public class LayerUpdater extends BaseMultiLayerUpdater { @Override protected INDArray getParams() { - return network.params(); + return network.getParams(); } @Override diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/MultiLayerUpdater.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/MultiLayerUpdater.java index f43aa85d2..1027f5003 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/MultiLayerUpdater.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/MultiLayerUpdater.java @@ -22,8 +22,8 @@ package org.deeplearning4j.nn.updater; import lombok.Getter; import lombok.extern.slf4j.Slf4j; +import org.deeplearning4j.nn.api.ITrainableLayer; import org.deeplearning4j.nn.api.Layer; -import org.deeplearning4j.nn.api.Trainable; import org.deeplearning4j.nn.api.Updater; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.nd4j.linalg.api.ndarray.INDArray; @@ -49,9 +49,9 @@ public class MultiLayerUpdater extends BaseMultiLayerUpdater } @Override - protected Trainable[] getOrderedLayers() { + protected ITrainableLayer[] getOrderedLayers() { Layer[] layers = network.getLayers(); - Trainable[] t = new Trainable[layers.length]; + ITrainableLayer[] t = new ITrainableLayer[layers.length]; System.arraycopy(layers, 0, t, 0, layers.length); return t; } @@ -66,7 +66,7 @@ public class MultiLayerUpdater extends BaseMultiLayerUpdater @Override protected INDArray getParams() { - return network.params(); + return network.getModelParams(); } @Override diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/UpdaterBlock.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/UpdaterBlock.java index 3366a48f9..7b496468f 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/UpdaterBlock.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/UpdaterBlock.java @@ -22,18 +22,11 @@ package org.deeplearning4j.nn.updater; import lombok.AllArgsConstructor; import lombok.Data; -import lombok.val; -import org.deeplearning4j.nn.api.Layer; -import org.deeplearning4j.nn.api.Trainable; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.layers.BaseLayer; -import org.deeplearning4j.nn.layers.FrozenLayer; +import org.deeplearning4j.nn.api.ITrainableLayer; import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.NDArrayIndex; import org.nd4j.linalg.learning.GradientUpdater; import org.nd4j.linalg.learning.regularization.Regularization; -import org.nd4j.linalg.ops.transforms.Transforms; import java.util.ArrayList; import java.util.List; @@ -56,7 +49,7 @@ public class UpdaterBlock { @AllArgsConstructor @Data public static class ParamState { - private final Trainable layer; + private final ITrainableLayer layer; private final String paramName; private final int paramOffsetStart; private final int paramOffsetEnd; @@ -89,7 +82,7 @@ public class UpdaterBlock { if (gradientUpdater == null) { ParamState varState = layersAndVariablesInBlock.get(0); String varName = varState.getParamName(); - gradientUpdater = varState.getLayer().getConfig().getUpdaterByParam(varName).instantiate(updaterView, + gradientUpdater = varState.getLayer().getTrainingConfig().getUpdaterByParam(varName).instantiate(updaterView, updaterViewRequiresInitialization); //UpdaterUtils.getGradientUpdater(varState.getLayer(), varState.getParamName()); } } @@ -97,7 +90,7 @@ public class UpdaterBlock { public boolean isPretrainUpdaterBlock() { //All in block should be the same layer, and all be pretrain params ParamState vs = layersAndVariablesInBlock.get(0); - return vs.getLayer().getConfig().isPretrainParam(vs.getParamName()); + return vs.getLayer().getTrainingConfig().isPretrainParam(vs.getParamName()); } public boolean skipDueToPretrainConfig( boolean isLayerUpdater) { @@ -148,7 +141,7 @@ public class UpdaterBlock { //Second: apply learning rate policy. Note that by definition we have the same LR policy for every single // variable in the block - Trainable l0 = layersAndVariablesInBlock.get(0).getLayer(); + ITrainableLayer l0 = layersAndVariablesInBlock.get(0).getLayer(); if (l0.numParams() == 0) { //No params for this layer return; @@ -194,10 +187,10 @@ public class UpdaterBlock { * @param gradientView Gradient view array for the layer + param * @param paramsView Parameter view array for the layer + param */ - protected void applyRegularization(Regularization.ApplyStep step, Trainable layer, String paramName, INDArray gradientView, INDArray paramsView, int iter, int epoch, double lr) { + protected void applyRegularization(Regularization.ApplyStep step, ITrainableLayer layer, String paramName, INDArray gradientView, INDArray paramsView, int iter, int epoch, double lr) { //TODO: do this for multiple contiguous params/layers (fewer, larger ops) - List l = layer.getConfig().getRegularizationByParam(paramName); + List l = layer.getTrainingConfig().getRegularizationByParam(paramName); if(l != null && !l.isEmpty()){ for(Regularization r : l){ if(r.applyStep() == step){ diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/UpdaterCreator.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/UpdaterCreator.java index 14850eafb..11573daa0 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/UpdaterCreator.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/UpdaterCreator.java @@ -28,13 +28,18 @@ import org.deeplearning4j.nn.updater.graph.ComputationGraphUpdater; /** * - * - * @author Adam Gibson + * Create an {@link org.deeplearning4j.nn.api.Updater} based on the provided {@link IModel}. */ public class UpdaterCreator { private UpdaterCreator() {} + /** + * Create an Updater for a given model type. This is either {@link ComputationGraphUpdater} or + * {@link MultiLayerUpdater} or a {@link LayerUpdater}. + * @param layer + * @return + */ public static org.deeplearning4j.nn.api.Updater getUpdater(IModel layer) { if (layer instanceof MultiLayerNetwork) { return new MultiLayerUpdater((MultiLayerNetwork) layer); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/UpdaterUtils.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/UpdaterUtils.java index 73bd5410e..14a2a54de 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/UpdaterUtils.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/UpdaterUtils.java @@ -20,16 +20,16 @@ package org.deeplearning4j.nn.updater; -import org.deeplearning4j.nn.api.Trainable; -import org.deeplearning4j.nn.api.TrainingConfig; +import org.deeplearning4j.nn.api.ITrainableLayer; +import org.deeplearning4j.nn.api.ITraininableLayerConfiguration; import org.nd4j.linalg.learning.config.IUpdater; public class UpdaterUtils { - public static boolean updaterConfigurationsEquals(Trainable layer1, String param1, Trainable layer2, String param2) { - TrainingConfig l1 = layer1.getConfig(); - TrainingConfig l2 = layer2.getConfig(); + public static boolean updaterConfigurationsEquals(ITrainableLayer layer1, String param1, ITrainableLayer layer2, String param2) { + ITraininableLayerConfiguration l1 = layer1.getTrainingConfig(); + ITraininableLayerConfiguration l2 = layer2.getTrainingConfig(); IUpdater u1 = l1.getUpdaterByParam(param1); IUpdater u2 = l2.getUpdaterByParam(param2); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/graph/ComputationGraphUpdater.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/graph/ComputationGraphUpdater.java index 1c39f52e1..952258bcf 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/graph/ComputationGraphUpdater.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/graph/ComputationGraphUpdater.java @@ -20,7 +20,7 @@ package org.deeplearning4j.nn.updater.graph; -import org.deeplearning4j.nn.api.Trainable; +import org.deeplearning4j.nn.api.ITrainableLayer; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.graph.vertex.GraphVertex; import org.deeplearning4j.nn.updater.BaseMultiLayerUpdater; @@ -31,7 +31,7 @@ import java.util.HashMap; public class ComputationGraphUpdater extends BaseMultiLayerUpdater { - protected Trainable[] orderedLayers; + protected ITrainableLayer[] orderedLayers; public ComputationGraphUpdater(ComputationGraph graph) { this(graph, null); @@ -41,14 +41,14 @@ public class ComputationGraphUpdater extends BaseMultiLayerUpdater(); - Trainable[] layers = getOrderedLayers(); - for (Trainable l : layers) { - layersByName.put(l.getConfig().getLayerName(), l); + ITrainableLayer[] layers = getOrderedLayers(); + for (ITrainableLayer l : layers) { + layersByName.put(l.getTrainingConfig().getLayerName(), l); } } @Override - protected Trainable[] getOrderedLayers() { + protected ITrainableLayer[] getOrderedLayers() { if (orderedLayers != null) { return orderedLayers; } @@ -57,7 +57,7 @@ public class ComputationGraphUpdater extends BaseMultiLayerUpdater pair) { INDArray gradient = pair.getFirst().gradient(conf.netWideVariables()); - INDArray params = model.params().dup(); //Need dup here: params returns an array that isn't a copy (hence changes to this are problematic for line search methods) + INDArray params = model.getModelParams().dup(); //Need dup here: params returns an array that isn't a copy (hence changes to this are problematic for line search methods) searchState.put(GRADIENT_KEY, gradient); searchState.put(SCORE_KEY, pair.getSecond()); searchState.put(PARAMS_KEY, params); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/LBFGS.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/LBFGS.java index 3a8fa9bdc..80a94c6e6 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/LBFGS.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/LBFGS.java @@ -71,7 +71,7 @@ public class LBFGS extends BaseOptimizer { @Override public void postStep(INDArray gradient) { INDArray previousParameters = (INDArray) searchState.get("oldparams"); - INDArray parameters = model.params(); + INDArray parameters = model.getModelParams(); INDArray previousGradient = (INDArray) searchState.get(GRADIENT_KEY); LinkedList rho = (LinkedList) searchState.get("rho"); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/StochasticGradientDescent.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/StochasticGradientDescent.java index ee7070f01..e0de12fe9 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/StochasticGradientDescent.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/StochasticGradientDescent.java @@ -54,7 +54,7 @@ public class StochasticGradientDescent extends BaseOptimizer { log.info("Applying external updates before FF..."); // we'll just fire off params update process - accumulator.applyUpdate(stepFunction, model.params(), Nd4j.createUninitialized(model.params().shape(), model.params().ordering()), false); + accumulator.applyUpdate(stepFunction, model.getModelParams(), Nd4j.createUninitialized(model.getModelParams().shape(), model.getModelParams().ordering()), false); } } @@ -62,7 +62,7 @@ public class StochasticGradientDescent extends BaseOptimizer { Gradient gradient = pair.getFirst(); - INDArray params = model.params(); + INDArray params = model.getModelParams(); // if optimizer has GradientsAccumulator defined - go for it if (accumulator != null) { @@ -87,7 +87,7 @@ public class StochasticGradientDescent extends BaseOptimizer { // if there's no update available - just go on then } else { - // if accumulator isn't used - we just to for direct updates application + // if accumulator isn't used - we just go for direct updates application stepFunction.step(params, gradient.gradient()); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/EncodedGradientsAccumulator.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/EncodedGradientsAccumulator.java index 7684caa6a..16e8a97e7 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/EncodedGradientsAccumulator.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/EncodedGradientsAccumulator.java @@ -172,7 +172,7 @@ public class EncodedGradientsAccumulator implements GradientsAccumulator, Regist public static long getOptimalBufferSize(IModel model, int numWorkers, int queueSize) { - return getOptimalBufferSize(model.params().length(), numWorkers, queueSize); + return getOptimalBufferSize(model.getModelParams().length(), numWorkers, queueSize); } @Override diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/CrashReportingUtil.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/CrashReportingUtil.java index 56f7d3b7f..b2e10ece5 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/CrashReportingUtil.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/CrashReportingUtil.java @@ -204,7 +204,7 @@ public class CrashReportingUtil { StringBuilder sb = genericMemoryStatus(); int bytesPerElement; - switch (isMLN ? mln.params().dataType() : cg.params().dataType()){ + switch (isMLN ? mln.getModelParams().dataType() : cg.getModelParams().dataType()){ case DOUBLE: bytesPerElement = 8; break; @@ -260,7 +260,7 @@ public class CrashReportingUtil { } long sumMem = 0; - long nParams = net.params().length(); + long nParams = net.getModelParams().length(); sb.append("\n----- Network Information -----\n") .append(f("Network # Parameters", nParams)) .append(fBytes("Parameter Memory", bytesPerElement * nParams)); @@ -334,9 +334,9 @@ public class CrashReportingUtil { //Listener info: Collection listeners; if(isMLN){ - listeners = mln.getListeners(); + listeners = mln.getTrainingListeners(); } else { - listeners = cg.getListeners(); + listeners = cg.getTrainingListeners(); } sb.append("\n----- Network Training Listeners -----\n"); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/ModelSerializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/ModelSerializer.java index e763d30bf..8649bcd19 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/ModelSerializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/ModelSerializer.java @@ -152,10 +152,10 @@ public class ModelSerializer { ZipEntry coefficients = new ZipEntry(COEFFICIENTS_BIN); zipfile.putNextEntry(coefficients); DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(zipfile)); - INDArray params = model.params(); + INDArray params = model.getModelParams(); if(params != null) { try { - Nd4j.write(model.params(), dos); + Nd4j.write(model.getModelParams(), dos); } finally { dos.flush(); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/NetworkUtils.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/NetworkUtils.java index 900a516cd..f19dd8a47 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/NetworkUtils.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/NetworkUtils.java @@ -22,11 +22,11 @@ package org.deeplearning4j.util; import lombok.extern.slf4j.Slf4j; import net.brutex.ai.dnn.api.IModel; -import org.deeplearning4j.nn.api.Trainable; +import org.deeplearning4j.nn.api.ITrainableLayer; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.layers.BaseLayer; +import org.deeplearning4j.nn.conf.layers.BaseLayerConfiguration; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.graph.vertex.GraphVertex; @@ -86,7 +86,7 @@ public class NetworkUtils { ComputationGraph cg = new ComputationGraph(conf); cg.init(); - cg.setParams(net.params()); + cg.setParams(net.getModelParams()); //Also copy across updater state: INDArray updaterState = net.getUpdater().getStateViewArray(); @@ -123,8 +123,8 @@ public class NetworkUtils { private static void setLearningRate(MultiLayerNetwork net, int layerNumber, double newLr, ISchedule newLrSchedule, boolean refreshUpdater) { LayerConfiguration l = net.getLayer(layerNumber).getLayerConfiguration(); - if (l instanceof BaseLayer) { - BaseLayer bl = (BaseLayer) l; + if (l instanceof BaseLayerConfiguration) { + BaseLayerConfiguration bl = (BaseLayerConfiguration) l; IUpdater u = bl.getIUpdater(); if (u != null && u.hasLearningRate()) { if (newLrSchedule != null) { @@ -205,8 +205,8 @@ public class NetworkUtils { LayerConfiguration l = net.getLayer(layerNumber).getLayerConfiguration(); int iter = net.getIterationCount(); int epoch = net.getEpochCount(); - if (l instanceof BaseLayer) { - BaseLayer bl = (BaseLayer) l; + if (l instanceof BaseLayerConfiguration) { + BaseLayerConfiguration bl = (BaseLayerConfiguration) l; IUpdater u = bl.getIUpdater(); if (u != null && u.hasLearningRate()) { double d = u.getLearningRate(iter, epoch); @@ -245,8 +245,8 @@ public class NetworkUtils { private static void setLearningRate(ComputationGraph net, String layerName, double newLr, ISchedule newLrSchedule, boolean refreshUpdater) { LayerConfiguration l = net.getLayer(layerName).getLayerConfiguration(); - if (l instanceof BaseLayer) { - BaseLayer bl = (BaseLayer) l; + if (l instanceof BaseLayerConfiguration) { + BaseLayerConfiguration bl = (BaseLayerConfiguration) l; IUpdater u = bl.getIUpdater(); if (u != null && u.hasLearningRate()) { if (newLrSchedule != null) { @@ -327,8 +327,8 @@ public class NetworkUtils { LayerConfiguration l = net.getLayer(layerName).getLayerConfiguration(); int iter = net.getComputationGraphConfiguration().getIterationCount(); int epoch = net.getComputationGraphConfiguration().getEpochCount(); - if (l instanceof BaseLayer) { - BaseLayer bl = (BaseLayer) l; + if (l instanceof BaseLayerConfiguration) { + BaseLayerConfiguration bl = (BaseLayerConfiguration) l; IUpdater u = bl.getIUpdater(); if (u != null && u.hasLearningRate()) { double d = u.getLearningRate(iter, epoch); @@ -499,7 +499,7 @@ public class NetworkUtils { } - private static int getId(Trainable trainable){ + private static int getId(ITrainableLayer trainable){ if(trainable instanceof GraphVertex){ GraphVertex gv = (GraphVertex)trainable; return gv.getVertexIndex(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/OutputLayerUtil.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/OutputLayerUtil.java index 76f06b556..1829fbd40 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/OutputLayerUtil.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/OutputLayerUtil.java @@ -20,6 +20,7 @@ package org.deeplearning4j.util; +import lombok.NonNull; import org.deeplearning4j.exception.DL4JInvalidConfigException; import org.deeplearning4j.nn.conf.layers.*; import org.deeplearning4j.nn.conf.layers.objdetect.Yolo2OutputLayer; @@ -148,7 +149,7 @@ public class OutputLayerUtil { return lf instanceof LossMCXENT || lf instanceof LossBinaryXENT; } - public static boolean activationExceedsZeroOneRange(IActivation activation, boolean isLossLayer){ + public static boolean activationExceedsZeroOneRange(@NonNull IActivation activation, boolean isLossLayer){ if(OUTSIDE_ZERO_ONE_RANGE.contains(activation.getClass())){ //Note: we're intentionally excluding identity here, for situations like dense(softmax) -> loss(identity) @@ -174,8 +175,8 @@ public class OutputLayerUtil { //Check that the activation function provides probabilities. This can't catch everything, but should catch a few // of the common mistakes users make - if(outputLayer instanceof BaseLayer){ - BaseLayer bl = (BaseLayer)outputLayer; + if(outputLayer instanceof BaseLayerConfiguration){ + BaseLayerConfiguration bl = (BaseLayerConfiguration)outputLayer; boolean isOutputLayer = outputLayer instanceof OutputLayer || outputLayer instanceof RnnOutputLayer || outputLayer instanceof CenterLossOutputLayer; if(activationExceedsZeroOneRange(bl.getActivationFn(), !isOutputLayer)){ diff --git a/cavis-dnn/cavis-dnn-nn/src/main/resources/simplelogger.properties b/cavis-dnn/cavis-dnn-nn/src/main/resources/simplelogger.properties index 93090cbc4..51c081db4 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/resources/simplelogger.properties +++ b/cavis-dnn/cavis-dnn-nn/src/main/resources/simplelogger.properties @@ -19,4 +19,7 @@ # # -org.slf4j.simpleLogger.defaultLogLevel = trace \ No newline at end of file +org.slf4j.simpleLogger.defaultLogLevel = debug + +org.slf4j.simpleLogger.log.org.deeplearning4j.optimize.listeners = info +org.slf4j.simplelogger.log.org.nd4j.linalg.dataset = info \ No newline at end of file diff --git a/cavis-dnn/cavis-dnn-nn/src/test/java/net/brutex/ai/dnn/api/dnnTest.java b/cavis-dnn/cavis-dnn-nn/src/test/java/net/brutex/ai/dnn/api/dnnTest.java index 9ca79badc..0b7ce4627 100644 --- a/cavis-dnn/cavis-dnn-nn/src/test/java/net/brutex/ai/dnn/api/dnnTest.java +++ b/cavis-dnn/cavis-dnn-nn/src/test/java/net/brutex/ai/dnn/api/dnnTest.java @@ -32,21 +32,27 @@ import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.ActivationLayer; import org.deeplearning4j.nn.conf.layers.DenseLayer; +import org.deeplearning4j.nn.conf.layers.OutputLayer; +import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.weights.WeightInitXavier; +import org.deeplearning4j.optimize.listeners.ScoreToChartListener; import org.junit.jupiter.api.Test; import org.nd4j.common.primitives.Pair; +import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.activations.impl.ActivationIdentity; import org.nd4j.linalg.activations.impl.ActivationLReLU; +import org.nd4j.linalg.activations.impl.ActivationSigmoid; import org.nd4j.linalg.learning.config.Adam; +import org.nd4j.linalg.lossfunctions.LossFunctions; class dnnTest { @Test void testFFLayer() { - int numFeatures = 128; - int batchSize = 10; - int numRows = 1000; + int numFeatures = 6; + int batchSize = 5; + int numRows = 100; AtomicInteger cnt = new AtomicInteger(0); FloatsDataSetIterator iterator = new FloatsDataSetIterator(floatIterable(numRows, numFeatures), batchSize); @@ -55,40 +61,52 @@ class dnnTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().build(); /** - * NeuralNetConfiguration confxx = NeuralNetConfiguration.builder() - * .seed(42) - * .updater(UPDATER) - * .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer) - * .gradientNormalizationThreshold(GRADIENT_THRESHOLD) - * .weightInit(WeightInit.XAVIER) - * .activation(Activation.IDENTITY) - * .list(genLayers()) - * .inputType(InputType.convolutional(X_DIM, Y_DIM, CHANNELS)) - * // .inputPreProcessor("CNN1", new FeedForwardToCnnPreProcessor(Y_DIM, X_DIM, CHANNELS)) - * .build(); + * NeuralNetConfiguration confxx = NeuralNetConfiguration.builder() .seed(42) .updater(UPDATER) + * .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer) + * .gradientNormalizationThreshold(GRADIENT_THRESHOLD) .weightInit(WeightInit.XAVIER) + * .activation(Activation.IDENTITY) .list(genLayers()) .inputType(InputType.convolutional(X_DIM, + * Y_DIM, CHANNELS)) // .inputPreProcessor("CNN1", new FeedForwardToCnnPreProcessor(Y_DIM, + * X_DIM, CHANNELS)) .build(); */ /** - * new DenseLayer.Builder().nIn(INPUT).nOut(X_DIM*Y_DIM*CHANNELS).weightInit(WeightInit.NORMAL).build(), - * new ActivationLayer.Builder(new ActivationLReLU(0.2)).build(), - * new DenseLayer.Builder().nIn(X_DIM*Y_DIM*CHANNELS).nOut(X_DIM*Y_DIM).build(), - * new ActivationLayer.Builder(new ActivationLReLU(0.2)).build(), - * new DenseLayer.Builder().nIn(X_DIM*Y_DIM).nOut(X_DIM*Y_DIM).build(), - * new ActivationLayer.Builder(new ActivationLReLU(0.2)).build(), - * new DenseLayer.Builder().nIn(X_DIM*Y_DIM).nOut(X_DIM*Y_DIM*CHANNELS).activation(Activation.TANH) + * new + * DenseLayer.Builder().nIn(INPUT).nOut(X_DIM*Y_DIM*CHANNELS).weightInit(WeightInit.NORMAL).build(), + * new ActivationLayer.Builder(new ActivationLReLU(0.2)).build(), new + * DenseLayer.Builder().nIn(X_DIM*Y_DIM*CHANNELS).nOut(X_DIM*Y_DIM).build(), new + * ActivationLayer.Builder(new ActivationLReLU(0.2)).build(), new + * DenseLayer.Builder().nIn(X_DIM*Y_DIM).nOut(X_DIM*Y_DIM).build(), new + * ActivationLayer.Builder(new ActivationLReLU(0.2)).build(), new + * DenseLayer.Builder().nIn(X_DIM*Y_DIM).nOut(X_DIM*Y_DIM*CHANNELS).activation(Activation.TANH) */ - NN.net() - .seed(42) - .updater( Adam.builder().learningRate(0.0002).beta1(0.5).build() ) - .gradientNormalization( GradientNormalization.RenormalizeL2PerLayer) - .gradientNormalizationThreshold( 100 ) - .weightInitFn( new WeightInitXavier() ) - .activationFn( new ActivationIdentity() ) - .inputType( InputType.convolutional( 28, 28, 1)) - .layer( new DenseLayer.Builder().nIn(10).nOut(20).build() ) - .layer(new ActivationLayer.Builder(new ActivationLReLU(0.2)).build() ) - ; + NeuralNetConfiguration network = + NN.net() + .seed(42) + .updater(Adam.builder().learningRate(0.0002).beta1(0.5).build()) + .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer) + .gradientNormalizationThreshold(100) + .weightInitFn(new WeightInitXavier()) + .activationFn(new ActivationSigmoid()) + // .inputType(InputType.convolutional(28, 28, 1)) + .layer(new DenseLayer.Builder().nIn(6).nOut(20).build()) + .layer(new ActivationLayer.Builder(new ActivationLReLU(0.2)).build()) + .layer(new DenseLayer.Builder().nIn(20).nOut(40).build()) + .layer(new ActivationLayer.Builder(new ActivationLReLU(0.2)).build()) + .layer(new DenseLayer.Builder().nIn(40).nOut(12).build()) + .layer(new ActivationLayer.Builder(new ActivationLReLU(0.2)).build()) + .layer(new DenseLayer.Builder().nIn(12).nOut(8).build()) + .layer(new ActivationLayer.Builder(new ActivationLReLU(0.2)).build()) + .layer(new OutputLayer.Builder(LossFunctions.LossFunction.SQUARED_LOSS).activation(Activation.SIGMOID).nOut(6).build()) + .build(); + MultiLayerNetwork net = new MultiLayerNetwork(network); + net.addTrainingListeners(new ScoreToChartListener("dnnTest")); + FloatsDataSetIterator dset = new FloatsDataSetIterator(floatIterable(numRows, numFeatures), batchSize); + + for (int i = 0; i < 2000000; i++) { + net.fit(dset); + System.out.println("Score: " + net.getScore()); + } } protected static Iterable> floatIterable(final int totalRows, final int numColumns) { @@ -108,8 +126,8 @@ class dnnTest { float[] features = new float[numColumns]; float[] labels = new float[numColumns]; for (int i = 0; i < numColumns; i++) { - features[i] = (float) i; - labels[i] = RandomUtils.nextFloat(0, 5); + features[i] = RandomUtils.nextFloat(0, 3); + labels[i] = (float) features[i] + 1; } return Pair.makePair(features, labels); } diff --git a/cavis-dnn/cavis-dnn-parallelwrapper-parameterserver/src/main/java/org/deeplearning4j/parallelism/parameterserver/ParameterServerTrainer.java b/cavis-dnn/cavis-dnn-parallelwrapper-parameterserver/src/main/java/org/deeplearning4j/parallelism/parameterserver/ParameterServerTrainer.java index 7af10085b..6808d4145 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper-parameterserver/src/main/java/org/deeplearning4j/parallelism/parameterserver/ParameterServerTrainer.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper-parameterserver/src/main/java/org/deeplearning4j/parallelism/parameterserver/ParameterServerTrainer.java @@ -58,7 +58,7 @@ public class ParameterServerTrainer extends DefaultTrainer { log.info("Sending parameters"); //send the updated params - parameterServerClient.pushNDArray(getModel().params()); + parameterServerClient.pushNDArray(getModel().getModelParams()); } @Override @@ -77,7 +77,7 @@ public class ParameterServerTrainer extends DefaultTrainer { log.info("About to send params in"); //send the updated params - parameterServerClient.pushNDArray(getModel().params()); + parameterServerClient.pushNDArray(getModel().getModelParams()); log.info("Sent params"); } diff --git a/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/EarlyStoppingParallelTrainer.java b/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/EarlyStoppingParallelTrainer.java index 73261f155..25a364b36 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/EarlyStoppingParallelTrainer.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/EarlyStoppingParallelTrainer.java @@ -91,16 +91,16 @@ public class EarlyStoppingParallelTrainer implements IEarlySto // adjust UI listeners AveragingTrainingListener trainerListener = new AveragingTrainingListener(this); if (model instanceof MultiLayerNetwork) { - Collection listeners = ((MultiLayerNetwork) model).getListeners(); + Collection listeners = ((MultiLayerNetwork) model).getTrainingListeners(); Collection newListeners = new LinkedList<>(listeners); newListeners.add(trainerListener); - model.setListeners(newListeners.toArray(new TrainingListener[]{})); + model.addTrainingListeners(newListeners.toArray(new TrainingListener[]{})); } else if (model instanceof ComputationGraph) { - Collection listeners = ((ComputationGraph) model).getListeners(); + Collection listeners = ((ComputationGraph) model).getTrainingListeners(); Collection newListeners = new LinkedList<>(listeners); newListeners.add(trainerListener); - model.setListeners(newListeners.toArray(new TrainingListener[]{})); + model.addTrainingListeners(newListeners.toArray(new TrainingListener[]{})); } this.wrapper = new ParallelWrapper.Builder<>(model).workers(workers).prefetchBuffer(prefetchBuffer) @@ -327,7 +327,7 @@ public class EarlyStoppingParallelTrainer implements IEarlySto @Override public void iterationDone(IModel model, int iteration, int epoch) { //Check per-iteration termination conditions - double latestScore = model.score(); + double latestScore = model.getScore(); trainer.setLatestScore(latestScore); for (IterationTerminationCondition c : esConfig.getIterationTerminationConditions()) { if (c.terminate(latestScore)) { diff --git a/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/InplaceParallelInference.java b/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/InplaceParallelInference.java index 0c1515109..571002280 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/InplaceParallelInference.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/InplaceParallelInference.java @@ -185,7 +185,7 @@ public class InplaceParallelInference extends ParallelInference { isMLN = sourceModel instanceof MultiLayerNetwork; // we clone params only if we're not on the same device - val params = rootDevice ? sourceModel.params() : sourceModel.params().unsafeDuplication(true); + val params = rootDevice ? sourceModel.getModelParams() : sourceModel.getModelParams().unsafeDuplication(true); // and moving it to specified device (only if NOT root if (!rootDevice) diff --git a/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/ParallelInference.java b/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/ParallelInference.java index 9d2c76a23..242a9f731 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/ParallelInference.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/ParallelInference.java @@ -462,7 +462,7 @@ public class ParallelInference { this.replicatedModel.init(); synchronized (locker) { - this.replicatedModel.setParams(protoModel.params().unsafeDuplication(true)); + this.replicatedModel.setParams(protoModel.getModelParams().unsafeDuplication(true)); Nd4j.getExecutioner().commit(); } @@ -476,7 +476,7 @@ public class ParallelInference { this.replicatedModel.init(); synchronized (locker) { - this.replicatedModel.setParams(protoModel.params().unsafeDuplication(true)); + this.replicatedModel.setParams(protoModel.getModelParams().unsafeDuplication(true)); Nd4j.getExecutioner().commit(); } diff --git a/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/ParallelWrapper.java b/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/ParallelWrapper.java index 5a880872d..921b9b49e 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/ParallelWrapper.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/ParallelWrapper.java @@ -345,8 +345,8 @@ public class ParallelWrapper implements AutoCloseable { List params = new ArrayList<>(); for (int cnt = 0; cnt < workers && cnt < locker.get(); cnt++) { - params.add(zoo[cnt].getModel().params()); - score += zoo[cnt].getModel().score(); + params.add(zoo[cnt].getModel().getModelParams()); + score += zoo[cnt].getModel().getScore(); } Nd4j.averageAndPropagate(null, params); @@ -956,11 +956,11 @@ public class ParallelWrapper implements AutoCloseable { List modelListeners = null; if (model instanceof MultiLayerNetwork) { - modelListeners = new ArrayList<>(((MultiLayerNetwork) model).getListeners()); - model.setListeners(new TrainingListener[]{}); + modelListeners = new ArrayList<>(((MultiLayerNetwork) model).getTrainingListeners()); + model.addTrainingListeners(new TrainingListener[]{}); } else if (model instanceof ComputationGraph) { - modelListeners = new ArrayList<>(((ComputationGraph) model).getListeners()); - model.setListeners(new TrainingListener[]{}); + modelListeners = new ArrayList<>(((ComputationGraph) model).getTrainingListeners()); + model.addTrainingListeners(new TrainingListener[]{}); } if (modelListeners != null && !modelListeners.isEmpty()) { diff --git a/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/factory/SymmetricTrainerContext.java b/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/factory/SymmetricTrainerContext.java index 663cb148c..6bafcd4cd 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/factory/SymmetricTrainerContext.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/factory/SymmetricTrainerContext.java @@ -74,6 +74,6 @@ public class SymmetricTrainerContext implements TrainerContext { @Override public void finalizeTraining(IModel originalModel, IModel... models) { // we CAN avarage here, but for now we'll just push first model params to original model - originalModel.setParams(models[0].params()); + originalModel.setParams(models[0].getModelParams()); } } diff --git a/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/trainer/DefaultTrainer.java b/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/trainer/DefaultTrainer.java index 2a1cf4d4e..522b3548a 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/trainer/DefaultTrainer.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/trainer/DefaultTrainer.java @@ -147,7 +147,7 @@ public class DefaultTrainer extends Thread implements Trainer { if (replicatedModel instanceof MultiLayerNetwork) { - replicatedModel.setParams(model.params().unsafeDuplication(true)); + replicatedModel.setParams(model.getModelParams().unsafeDuplication(true)); Updater updater = ((MultiLayerNetwork) model).getUpdater(); INDArray view = updater.getStateViewArray(); @@ -161,7 +161,7 @@ public class DefaultTrainer extends Thread implements Trainer { updater.setStateViewArray((MultiLayerNetwork) replicatedModel, viewD, false); } } else if (replicatedModel instanceof ComputationGraph) { - replicatedModel.setParams(model.params().unsafeDuplication(true)); + replicatedModel.setParams(model.getModelParams().unsafeDuplication(true)); ComputationGraphUpdater updater = ((ComputationGraph) model).getUpdater(); INDArray view = updater.getStateViewArray(); @@ -278,7 +278,7 @@ public class DefaultTrainer extends Thread implements Trainer { } configureListeners(uuid, oldListeners, replicatedListeners); - this.replicatedModel.setListeners(replicatedListeners.toArray(new TrainingListener[]{})); + this.replicatedModel.addTrainingListeners(replicatedListeners.toArray(new TrainingListener[]{})); } @Override @@ -305,7 +305,7 @@ public class DefaultTrainer extends Thread implements Trainer { // we replicate original model params & updater state, just in case it's pre-trained model try { modelLock.writeLock().lock(); - replicatedModel.setParams(originalModel.params().unsafeDuplication(true)); + replicatedModel.setParams(originalModel.getModelParams().unsafeDuplication(true)); Updater updaterReplica = ((MultiLayerNetwork) replicatedModel).getUpdater(); Updater updaterOrigina = ((MultiLayerNetwork) originalModel).getUpdater(); @@ -338,7 +338,7 @@ public class DefaultTrainer extends Thread implements Trainer { // we replicate original model params & updater state, just in case it's pre-trained model try { modelLock.writeLock().lock(); - replicatedModel.setParams(originalModel.params().unsafeDuplication(true)); + replicatedModel.setParams(originalModel.getModelParams().unsafeDuplication(true)); ComputationGraphUpdater updaterReplica = ((ComputationGraph) replicatedModel).getUpdater(); ComputationGraphUpdater updaterOrigina = ((ComputationGraph) originalModel).getUpdater(); @@ -389,7 +389,7 @@ public class DefaultTrainer extends Thread implements Trainer { Nd4j.getExecutioner().commit(); // we ensure memory is updated on host side - Nd4j.getAffinityManager().ensureLocation(replicatedModel.params(), + Nd4j.getAffinityManager().ensureLocation(replicatedModel.getModelParams(), AffinityManager.Location.HOST); if (replicatedModel instanceof MultiLayerNetwork) { @@ -427,7 +427,7 @@ public class DefaultTrainer extends Thread implements Trainer { Nd4j.getExecutioner().commit(); // we ensure memory is updated on host side - Nd4j.getAffinityManager().ensureLocation(replicatedModel.params(), + Nd4j.getAffinityManager().ensureLocation(replicatedModel.getModelParams(), AffinityManager.Location.HOST); ComputationGraphUpdater updaterReplica = ((ComputationGraph) replicatedModel).getUpdater(); diff --git a/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/InplaceParallelInferenceTest.java b/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/InplaceParallelInferenceTest.java index e64e6d06f..d952ebf4a 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/InplaceParallelInferenceTest.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/InplaceParallelInferenceTest.java @@ -65,7 +65,7 @@ public class InplaceParallelInferenceTest extends BaseDL4JTest { for (val m : models) { assertNotNull(m); - assertEquals(net.params(), m.params()); + assertEquals(net.getModelParams(), m.getModelParams()); } val conf2 = NeuralNetConfiguration.builder() @@ -80,7 +80,7 @@ public class InplaceParallelInferenceTest extends BaseDL4JTest { val net2 = new ComputationGraph(conf2); net2.init(); - assertNotEquals(net.params(), net2.params()); + assertNotEquals(net.getModelParams(), net2.getModelParams()); pi.updateModel(net2); @@ -90,7 +90,7 @@ public class InplaceParallelInferenceTest extends BaseDL4JTest { for (val m : models2) { assertNotNull(m); - assertEquals(net2.params(), m.params()); + assertEquals(net2.getModelParams(), m.getModelParams()); } } finally { pi.shutdown(); diff --git a/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/ParallelInferenceTest.java b/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/ParallelInferenceTest.java index 5f1ac9a7a..cdf908911 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/ParallelInferenceTest.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/ParallelInferenceTest.java @@ -790,7 +790,7 @@ public class ParallelInferenceTest extends BaseDL4JTest { // model can be null for some of the workers yet, due to race condition if (m != null) { Thread.sleep(500); - assertEquals( net.params(), m.params(), "Failed at model [" + cnt0 + "]"); + assertEquals( net.getModelParams(), m.getModelParams(), "Failed at model [" + cnt0 + "]"); passed = true; } cnt0++; @@ -818,7 +818,7 @@ public class ParallelInferenceTest extends BaseDL4JTest { cnt0 = 0; for (val m:modelsAfter) { assertNotNull( m, "Failed at model [" + cnt0 + "]"); - assertEquals( net2.params(), m.params(), "Failed at model [" + cnt0++ + "]"); + assertEquals( net2.getModelParams(), m.getModelParams(), "Failed at model [" + cnt0++ + "]"); } inf.shutdown(); diff --git a/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/ParallelWrapperTest.java b/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/ParallelWrapperTest.java index b74262dd2..471cafbfd 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/ParallelWrapperTest.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/ParallelWrapperTest.java @@ -26,7 +26,6 @@ import org.deeplearning4j.datasets.iterator.EarlyTerminationDataSetIterator; import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; import org.deeplearning4j.eval.Evaluation; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration.NeuralNetConfigurationBuilder; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; import org.deeplearning4j.nn.conf.layers.DenseLayer; @@ -112,7 +111,7 @@ public class ParallelWrapperTest extends BaseDL4JTest { .build(); log.info("Train model...."); - model.setListeners(new ScoreIterationListener(100)); + model.addTrainingListeners(new ScoreIterationListener(100)); long timeX = System.currentTimeMillis(); // optionally you might want to use MultipleEpochsIterator instead of manually iterating/resetting over your iterator diff --git a/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestListeners.java b/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestListeners.java index 799f2dfd7..4389d8f68 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestListeners.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestListeners.java @@ -96,7 +96,7 @@ public class TestListeners extends BaseDL4JTest { model.init(); StatsStorage ss = new InMemoryStatsStorage(); - model.setListeners(new TestListener(), new StatsListener(ss)); + model.addTrainingListeners(new TestListener(), new StatsListener(ss)); testListenersForModel(model, null); @@ -119,7 +119,7 @@ public class TestListeners extends BaseDL4JTest { model.init(); StatsStorage ss = new InMemoryStatsStorage(); - model.setListeners(new TestListener(), new StatsListener(ss)); + model.addTrainingListeners(new TestListener(), new StatsListener(ss)); testListenersForModel(model, null); diff --git a/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestParallelEarlyStopping.java b/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestParallelEarlyStopping.java index a3b97339b..a003d99fb 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestParallelEarlyStopping.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestParallelEarlyStopping.java @@ -107,7 +107,7 @@ public class TestParallelEarlyStopping extends BaseDL4JTest { .lossFunction(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.setListeners(new ScoreIterationListener(1)); + net.addTrainingListeners(new ScoreIterationListener(1)); DataSetIterator irisIter = new IrisDataSetIterator(50, 600); EarlyStoppingModelSaver saver = new InMemoryModelSaver<>(); @@ -140,7 +140,7 @@ public class TestParallelEarlyStopping extends BaseDL4JTest { .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.setListeners(new ScoreIterationListener(1)); + net.addTrainingListeners(new ScoreIterationListener(1)); DataSetIterator irisIter = new IrisDataSetIterator(10, 150); EarlyStoppingModelSaver saver = new InMemoryModelSaver<>(); diff --git a/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestParallelEarlyStoppingUI.java b/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestParallelEarlyStoppingUI.java index 9d8fe7c70..66a9b76c4 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestParallelEarlyStoppingUI.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestParallelEarlyStoppingUI.java @@ -67,7 +67,7 @@ public class TestParallelEarlyStoppingUI extends BaseDL4JTest { // it's important that the UI can report results from parallel training // there's potential for StatsListener to fail if certain properties aren't set in the model StatsStorage statsStorage = new InMemoryStatsStorage(); - net.setListeners(new StatsListener(statsStorage)); + net.addTrainingListeners(new StatsListener(statsStorage)); uiServer.attach(statsStorage); DataSetIterator irisIter = new IrisDataSetIterator(50, 500); diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/graph/SparkComputationGraph.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/graph/SparkComputationGraph.java index 84c7cf753..4849ee142 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/graph/SparkComputationGraph.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/graph/SparkComputationGraph.java @@ -370,7 +370,7 @@ public class SparkComputationGraph extends SparkListenable { */ public double calculateScore(JavaRDD data, boolean average, int minibatchSize) { JavaRDD> rdd = data.mapPartitions(new ScoreFlatMapFunctionCGDataSet(conf.toJson(), - sc.broadcast(network.params()), minibatchSize)); + sc.broadcast(network.getModelParams()), minibatchSize)); //Reduce to a single tuple, with example count + sum of scores Tuple2 countAndSumScores = rdd.reduce(new LongDoubleReduceFunction()); @@ -405,7 +405,7 @@ public class SparkComputationGraph extends SparkListenable { */ public double calculateScoreMultiDataSet(JavaRDD data, boolean average, int minibatchSize) { JavaRDD> rdd = data.mapPartitions(new ScoreFlatMapFunctionCGMultiDataSet(conf.toJson(), - sc.broadcast(network.params()), minibatchSize)); + sc.broadcast(network.getModelParams()), minibatchSize)); //Reduce to a single tuple, with example count + sum of scores Tuple2 countAndSumScores = rdd.reduce(new LongDoubleReduceFunction()); if (average) { @@ -476,7 +476,7 @@ public class SparkComputationGraph extends SparkListenable { */ public JavaDoubleRDD scoreExamplesMultiDataSet(JavaRDD data, boolean includeRegularizationTerms, int batchSize) { - return data.mapPartitionsToDouble(new ScoreExamplesFunction(sc.broadcast(network.params()), + return data.mapPartitionsToDouble(new ScoreExamplesFunction(sc.broadcast(network.getModelParams()), sc.broadcast(conf.toJson()), includeRegularizationTerms, batchSize)); } @@ -527,7 +527,7 @@ public class SparkComputationGraph extends SparkListenable { * @return Network output given the input, by key */ public JavaPairRDD feedForwardWithKey(JavaPairRDD featuresData, int batchSize) { - return featuresData.mapPartitionsToPair(new GraphFeedForwardWithKeyFunction(sc.broadcast(network.params()), + return featuresData.mapPartitionsToPair(new GraphFeedForwardWithKeyFunction(sc.broadcast(network.getModelParams()), sc.broadcast(conf.toJson()), batchSize)); } @@ -554,7 +554,7 @@ public class SparkComputationGraph extends SparkListenable { */ public JavaPairRDD scoreExamplesMultiDataSet(JavaPairRDD data, boolean includeRegularizationTerms, int batchSize) { - return data.mapPartitionsToPair(new ScoreExamplesWithKeyFunction(sc.broadcast(network.params()), + return data.mapPartitionsToPair(new ScoreExamplesWithKeyFunction(sc.broadcast(network.getModelParams()), sc.broadcast(conf.toJson()), includeRegularizationTerms, batchSize)); } @@ -820,7 +820,7 @@ public class SparkComputationGraph extends SparkListenable { */ public T[] doEvaluation(JavaRDD data, int evalNumWorkers, int evalBatchSize, T... emptyEvaluations) { IEvaluateFlatMapFunction evalFn = new IEvaluateFlatMapFunction<>(true, sc.broadcast(conf.toJson()), - SparkUtils.asByteArrayBroadcast(sc, network.params()), evalNumWorkers, evalBatchSize, emptyEvaluations); + SparkUtils.asByteArrayBroadcast(sc, network.getModelParams()), evalNumWorkers, evalBatchSize, emptyEvaluations); JavaRDD evaluations = data.mapPartitions(evalFn); return evaluations.treeAggregate(null, new IEvaluateAggregateFunction(), new IEvaluateAggregateFunction()); @@ -844,7 +844,7 @@ public class SparkComputationGraph extends SparkListenable { public T[] doEvaluationMDS(JavaRDD data, int evalNumWorkers, int evalBatchSize, T... emptyEvaluations) { Preconditions.checkArgument(evalNumWorkers > 0, "Invalid number of evaulation workers: require at least 1 - got %s", evalNumWorkers); IEvaluateMDSFlatMapFunction evalFn = new IEvaluateMDSFlatMapFunction<>(sc.broadcast(conf.toJson()), - SparkUtils.asByteArrayBroadcast(sc, network.params()), evalNumWorkers, evalBatchSize, emptyEvaluations); + SparkUtils.asByteArrayBroadcast(sc, network.getModelParams()), evalNumWorkers, evalBatchSize, emptyEvaluations); JavaRDD evaluations = data.mapPartitions(evalFn); return evaluations.treeAggregate(null, new IEvaluateAggregateFunction(), new IEvaluateAggregateFunction()); @@ -906,7 +906,7 @@ public class SparkComputationGraph extends SparkListenable { protected IEvaluation[] doEvaluation(JavaRDD data, int evalNumWorkers, int evalBatchSize, DataSetLoader loader, MultiDataSetLoader mdsLoader, IEvaluation... emptyEvaluations){ IEvaluateMDSPathsFlatMapFunction evalFn = new IEvaluateMDSPathsFlatMapFunction(sc.broadcast(conf.toJson()), - SparkUtils.asByteArrayBroadcast(sc, network.params()), evalNumWorkers, evalBatchSize, loader, mdsLoader, + SparkUtils.asByteArrayBroadcast(sc, network.getModelParams()), evalNumWorkers, evalBatchSize, loader, mdsLoader, BroadcastHadoopConfigHolder.get(sc), emptyEvaluations); Preconditions.checkArgument(evalNumWorkers > 0, "Invalid number of evaulation workers: require at least 1 - got %s", evalNumWorkers); JavaRDD evaluations = data.mapPartitions(evalFn); diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/multilayer/SparkDl4jMultiLayer.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/multilayer/SparkDl4jMultiLayer.java index 2a0c7b655..890c62c3d 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/multilayer/SparkDl4jMultiLayer.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/multilayer/SparkDl4jMultiLayer.java @@ -430,7 +430,7 @@ public class SparkDl4jMultiLayer extends SparkListenable { * @see MultiLayerNetwork#scoreExamples(DataSet, boolean) */ public JavaDoubleRDD scoreExamples(JavaRDD data, boolean includeRegularizationTerms, int batchSize) { - return data.mapPartitionsToDouble(new ScoreExamplesFunction(sc.broadcast(network.params()), + return data.mapPartitionsToDouble(new ScoreExamplesFunction(sc.broadcast(network.getModelParams()), sc.broadcast(conf.toJson()), includeRegularizationTerms, batchSize)); } @@ -466,7 +466,7 @@ public class SparkDl4jMultiLayer extends SparkListenable { */ public JavaPairRDD scoreExamples(JavaPairRDD data, boolean includeRegularizationTerms, int batchSize) { - return data.mapPartitionsToPair(new ScoreExamplesWithKeyFunction(sc.broadcast(network.params()), + return data.mapPartitionsToPair(new ScoreExamplesWithKeyFunction(sc.broadcast(network.getModelParams()), sc.broadcast(conf.toJson()), includeRegularizationTerms, batchSize)); } @@ -494,7 +494,7 @@ public class SparkDl4jMultiLayer extends SparkListenable { */ public JavaPairRDD feedForwardWithMaskAndKey(JavaPairRDD> featuresDataAndMask, int batchSize) { return featuresDataAndMask - .mapPartitionsToPair(new FeedForwardWithKeyFunction(sc.broadcast(network.params()), + .mapPartitionsToPair(new FeedForwardWithKeyFunction(sc.broadcast(network.getModelParams()), sc.broadcast(conf.toJson()), batchSize)); } @@ -708,7 +708,7 @@ public class SparkDl4jMultiLayer extends SparkListenable { */ public T[] doEvaluation(JavaRDD data, int evalNumWorkers, int evalBatchSize, T... emptyEvaluations) { IEvaluateFlatMapFunction evalFn = new IEvaluateFlatMapFunction<>(false, sc.broadcast(conf.toJson()), - SparkUtils.asByteArrayBroadcast(sc, network.params()), evalNumWorkers, evalBatchSize, emptyEvaluations); + SparkUtils.asByteArrayBroadcast(sc, network.getModelParams()), evalNumWorkers, evalBatchSize, emptyEvaluations); JavaRDD evaluations = data.mapPartitions(evalFn); return evaluations.treeAggregate(null, new IEvaluateAggregateFunction(), new IEvaluationReduceFunction()); } @@ -771,7 +771,7 @@ public class SparkDl4jMultiLayer extends SparkListenable { protected IEvaluation[] doEvaluation(JavaRDD data, int evalNumWorkers, int evalBatchSize, DataSetLoader loader, MultiDataSetLoader mdsLoader, IEvaluation... emptyEvaluations){ Configuration config = sc.hadoopConfiguration(); IEvaluateMDSPathsFlatMapFunction evalFn = new IEvaluateMDSPathsFlatMapFunction(sc.broadcast(conf.toJson()), - SparkUtils.asByteArrayBroadcast(sc, network.params()), evalNumWorkers, evalBatchSize, loader, mdsLoader, + SparkUtils.asByteArrayBroadcast(sc, network.getModelParams()), evalNumWorkers, evalBatchSize, loader, mdsLoader, BroadcastHadoopConfigHolder.get(sc), emptyEvaluations); Preconditions.checkArgument(evalNumWorkers > 0, "Invalid number of evaulation workers: require at least 1 - got %s", evalNumWorkers); JavaRDD evaluations = data.mapPartitions(evalFn); diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/paramavg/ParameterAveragingTrainingMaster.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/paramavg/ParameterAveragingTrainingMaster.java index d3fb3355f..38a7e5bd8 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/paramavg/ParameterAveragingTrainingMaster.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/paramavg/ParameterAveragingTrainingMaster.java @@ -276,7 +276,7 @@ public class ParameterAveragingTrainingMaster @Override public ParameterAveragingTrainingWorker getWorkerInstance(SparkDl4jMultiLayer network) { NetBroadcastTuple tuple = new NetBroadcastTuple(network.getNetwork().getNetConfiguration(), - network.getNetwork().params(), network.getNetwork().getUpdater().getStateViewArray()); + network.getNetwork().getModelParams(), network.getNetwork().getUpdater().getStateViewArray()); if (collectTrainingStats) stats.logBroadcastStart(); @@ -293,7 +293,7 @@ public class ParameterAveragingTrainingMaster @Override public ParameterAveragingTrainingWorker getWorkerInstance(SparkComputationGraph graph) { NetBroadcastTuple tuple = new NetBroadcastTuple(graph.getNetwork().getComputationGraphConfiguration(), - graph.getNetwork().params(), graph.getNetwork().getUpdater().getStateViewArray()); + graph.getNetwork().getModelParams(), graph.getNetwork().getUpdater().getStateViewArray()); if (collectTrainingStats) stats.logBroadcastStart(); diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/paramavg/ParameterAveragingTrainingWorker.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/paramavg/ParameterAveragingTrainingWorker.java index 2322ba5c2..3b3b9f9b3 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/paramavg/ParameterAveragingTrainingWorker.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/paramavg/ParameterAveragingTrainingWorker.java @@ -172,9 +172,9 @@ public class ParameterAveragingTrainingWorker extends BaseTrainingWorker irisData = getIris(); @@ -130,7 +130,7 @@ public class TestEarlyStoppingSpark extends BaseSparkTest { .lossFunction(LossFunctions.LossFunction.MSE).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.setListeners(new ScoreIterationListener(5)); + net.addTrainingListeners(new ScoreIterationListener(5)); JavaRDD irisData = getIris(); EarlyStoppingModelSaver saver = new InMemoryModelSaver<>(); @@ -169,7 +169,7 @@ public class TestEarlyStoppingSpark extends BaseSparkTest { .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.setListeners(new ScoreIterationListener(5)); + net.addTrainingListeners(new ScoreIterationListener(5)); JavaRDD irisData = getIris(); @@ -215,7 +215,7 @@ public class TestEarlyStoppingSpark extends BaseSparkTest { .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.setListeners(new ScoreIterationListener(5)); + net.addTrainingListeners(new ScoreIterationListener(5)); JavaRDD irisData = getIris(); @@ -252,7 +252,7 @@ public class TestEarlyStoppingSpark extends BaseSparkTest { .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.setListeners(new ScoreIterationListener(5)); + net.addTrainingListeners(new ScoreIterationListener(5)); JavaRDD irisData = getIris(); diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSparkCompGraph.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSparkCompGraph.java index f0e1fefb1..1a196af4f 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSparkCompGraph.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSparkCompGraph.java @@ -78,7 +78,7 @@ public class TestEarlyStoppingSparkCompGraph extends BaseSparkTest { .lossFunction(LossFunctions.LossFunction.MCXENT).build(), "in") .setOutputs("0").build(); ComputationGraph net = new ComputationGraph(conf); - net.setListeners(new ScoreIterationListener(5)); + net.addTrainingListeners(new ScoreIterationListener(5)); JavaRDD irisData = getIris(); @@ -132,7 +132,7 @@ public class TestEarlyStoppingSparkCompGraph extends BaseSparkTest { .lossFunction(LossFunctions.LossFunction.MSE).build(), "in") .setOutputs("0").build(); ComputationGraph net = new ComputationGraph(conf); - net.setListeners(new ScoreIterationListener(5)); + net.addTrainingListeners(new ScoreIterationListener(5)); JavaRDD irisData = getIris(); EarlyStoppingModelSaver saver = new InMemoryModelSaver<>(); @@ -173,7 +173,7 @@ public class TestEarlyStoppingSparkCompGraph extends BaseSparkTest { .lossFunction(LossFunctions.LossFunction.MCXENT).build(), "in") .setOutputs("0").build(); ComputationGraph net = new ComputationGraph(conf); - net.setListeners(new ScoreIterationListener(5)); + net.addTrainingListeners(new ScoreIterationListener(5)); JavaRDD irisData = getIris(); @@ -221,7 +221,7 @@ public class TestEarlyStoppingSparkCompGraph extends BaseSparkTest { .lossFunction(LossFunctions.LossFunction.MCXENT).build(), "in") .setOutputs("0").build(); ComputationGraph net = new ComputationGraph(conf); - net.setListeners(new ScoreIterationListener(5)); + net.addTrainingListeners(new ScoreIterationListener(5)); JavaRDD irisData = getIris(); @@ -260,7 +260,7 @@ public class TestEarlyStoppingSparkCompGraph extends BaseSparkTest { .lossFunction(LossFunctions.LossFunction.MCXENT).build(), "in") .setOutputs("0").build(); ComputationGraph net = new ComputationGraph(conf); - net.setListeners(new ScoreIterationListener(5)); + net.addTrainingListeners(new ScoreIterationListener(5)); JavaRDD irisData = getIris(); diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/customlayer/layer/CustomLayer.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/customlayer/layer/CustomLayer.java index 3c3e2c46f..97f6a2c89 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/customlayer/layer/CustomLayer.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/customlayer/layer/CustomLayer.java @@ -56,7 +56,7 @@ public class CustomLayer extends FeedForwardLayer { boolean initializeParams, DataType networkDataType) { LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); CustomLayerImpl ret = new CustomLayerImpl(lconf, networkDataType); - ret.setListeners(trainingListeners); + ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); Map paramTable = initializer().init(this, layerParamsView, initializeParams); diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/graph/TestSparkComputationGraph.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/graph/TestSparkComputationGraph.java index 109add55d..20727ed03 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/graph/TestSparkComputationGraph.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/graph/TestSparkComputationGraph.java @@ -40,7 +40,6 @@ import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.weights.WeightInit; -import org.deeplearning4j.optimize.api.TrainingListener; import org.deeplearning4j.optimize.listeners.ScoreIterationListener; import org.deeplearning4j.spark.BaseSparkTest; import org.deeplearning4j.spark.api.RDDTrainingApproach; @@ -272,9 +271,9 @@ public class TestSparkComputationGraph extends BaseSparkTest { sparkNet3.fit(rdd); - INDArray p1 = sparkNet1.getNetwork().params(); - INDArray p2 = sparkNet2.getNetwork().params(); - INDArray p3 = sparkNet3.getNetwork().params(); + INDArray p1 = sparkNet1.getNetwork().getModelParams(); + INDArray p2 = sparkNet2.getNetwork().getModelParams(); + INDArray p3 = sparkNet3.getNetwork().getModelParams(); sparkNet1.getTrainingMaster().deleteTempFiles(sc); sparkNet2.getTrainingMaster().deleteTempFiles(sc); diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/multilayer/TestMiscFunctions.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/multilayer/TestMiscFunctions.java index 6b22acca7..7a638199c 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/multilayer/TestMiscFunctions.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/multilayer/TestMiscFunctions.java @@ -239,7 +239,7 @@ public class TestMiscFunctions extends BaseSparkTest { JavaPairRDD reconstr = rdd.mapPartitionsToPair(new VaeReconstructionProbWithKeyFunction( - sc.broadcast(net.params()), sc.broadcast(mlc.toJson()), true, 16, 128)); + sc.broadcast(net.getModelParams()), sc.broadcast(mlc.toJson()), true, 16, 128)); Map l = reconstr.collectAsMap(); @@ -282,7 +282,7 @@ public class TestMiscFunctions extends BaseSparkTest { JavaPairRDD reconstrErrors = rdd.mapPartitionsToPair(new VaeReconstructionErrorWithKeyFunction( - sc.broadcast(net.params()), sc.broadcast(mlc.toJson()), 16)); + sc.broadcast(net.getModelParams()), sc.broadcast(mlc.toJson()), 16)); Map l = reconstrErrors.collectAsMap(); diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestCompareParameterAveragingSparkVsSingleMachine.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestCompareParameterAveragingSparkVsSingleMachine.java index 277c4a133..7ba980f62 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestCompareParameterAveragingSparkVsSingleMachine.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestCompareParameterAveragingSparkVsSingleMachine.java @@ -191,7 +191,7 @@ public class TestCompareParameterAveragingSparkVsSingleMachine { MultiLayerNetwork net = new MultiLayerNetwork(getConf(12345, new RmsProp(0.5))); net.init(); - INDArray initialParams = net.params().dup(); + INDArray initialParams = net.getModelParams().dup(); for (int i = 0; i < seeds.length; i++) { DataSet ds = getOneDataSet(miniBatchSize, seeds[i]); @@ -199,13 +199,13 @@ public class TestCompareParameterAveragingSparkVsSingleMachine { net.setUpdater(null); net.fit(ds); } - INDArray finalParams = net.params().dup(); + INDArray finalParams = net.getModelParams().dup(); //Do training on Spark with one executor, for 3 separate minibatches TrainingMaster tm = getTrainingMaster(1, miniBatchSize, saveUpdater); SparkDl4jMultiLayer sparkNet = new SparkDl4jMultiLayer(sc, getConf(12345, new RmsProp(0.5)), tm); sparkNet.setCollectTrainingStats(true); - INDArray initialSparkParams = sparkNet.getNetwork().params().dup(); + INDArray initialSparkParams = sparkNet.getNetwork().getModelParams().dup(); for (int i = 0; i < seeds.length; i++) { List list = getOneDataSetAsIndividalExamples(miniBatchSize, seeds[i]); @@ -214,7 +214,7 @@ public class TestCompareParameterAveragingSparkVsSingleMachine { sparkNet.fit(rdd); } - INDArray finalSparkParams = sparkNet.getNetwork().params().dup(); + INDArray finalSparkParams = sparkNet.getNetwork().getModelParams().dup(); assertEquals(initialParams, initialSparkParams); assertNotEquals(initialParams, finalParams); @@ -245,7 +245,7 @@ public class TestCompareParameterAveragingSparkVsSingleMachine { ComputationGraph net = new ComputationGraph(getGraphConf(12345, new RmsProp(0.5))); net.init(); - INDArray initialParams = net.params().dup(); + INDArray initialParams = net.getModelParams().dup(); for (int i = 0; i < seeds.length; i++) { DataSet ds = getOneDataSet(miniBatchSize, seeds[i]); @@ -253,14 +253,14 @@ public class TestCompareParameterAveragingSparkVsSingleMachine { net.setUpdater(null); net.fit(ds); } - INDArray finalParams = net.params().dup(); + INDArray finalParams = net.getModelParams().dup(); //Do training on Spark with one executor, for 3 separate minibatches TrainingMaster tm = getTrainingMaster(1, miniBatchSize, saveUpdater); SparkComputationGraph sparkNet = new SparkComputationGraph(sc, getGraphConf(12345, new RmsProp(0.5)), tm); sparkNet.setCollectTrainingStats(true); - INDArray initialSparkParams = sparkNet.getNetwork().params().dup(); + INDArray initialSparkParams = sparkNet.getNetwork().getModelParams().dup(); for (int i = 0; i < seeds.length; i++) { List list = getOneDataSetAsIndividalExamples(miniBatchSize, seeds[i]); @@ -269,7 +269,7 @@ public class TestCompareParameterAveragingSparkVsSingleMachine { sparkNet.fit(rdd); } - INDArray finalSparkParams = sparkNet.getNetwork().params().dup(); + INDArray finalSparkParams = sparkNet.getNetwork().getModelParams().dup(); assertEquals(initialParams, initialSparkParams); assertNotEquals(initialParams, finalParams); @@ -304,7 +304,7 @@ public class TestCompareParameterAveragingSparkVsSingleMachine { MultiLayerNetwork net = new MultiLayerNetwork(getConf(12345, new Sgd(0.5))); net.init(); - INDArray initialParams = net.params().dup(); + INDArray initialParams = net.getModelParams().dup(); // executioner.addToWatchdog(initialParams, "initialParams"); @@ -314,7 +314,7 @@ public class TestCompareParameterAveragingSparkVsSingleMachine { net.setUpdater(null); net.fit(ds); } - INDArray finalParams = net.params().dup(); + INDArray finalParams = net.getModelParams().dup(); //Do training on Spark with one executor, for 3 separate minibatches // TrainingMaster tm = getTrainingMaster(1, miniBatchSizePerWorker, saveUpdater); @@ -325,7 +325,7 @@ public class TestCompareParameterAveragingSparkVsSingleMachine { .rddTrainingApproach(RDDTrainingApproach.Export).build(); SparkDl4jMultiLayer sparkNet = new SparkDl4jMultiLayer(sc, getConf(12345, new Sgd(0.5)), tm); sparkNet.setCollectTrainingStats(true); - INDArray initialSparkParams = sparkNet.getNetwork().params().dup(); + INDArray initialSparkParams = sparkNet.getNetwork().getModelParams().dup(); // executioner.addToWatchdog(initialSparkParams, "initialSparkParams"); @@ -339,7 +339,7 @@ public class TestCompareParameterAveragingSparkVsSingleMachine { // System.out.println(sparkNet.getSparkTrainingStats().statsAsString()); sparkNet.getSparkTrainingStats().statsAsString(); - INDArray finalSparkParams = sparkNet.getNetwork().params().dup(); + INDArray finalSparkParams = sparkNet.getNetwork().getModelParams().dup(); // System.out.println("Initial (Local) params: " + Arrays.toString(initialParams.data().asFloat())); // System.out.println("Initial (Spark) params: " @@ -353,7 +353,7 @@ public class TestCompareParameterAveragingSparkVsSingleMachine { double sparkScore = sparkNet.getScore(); assertTrue(sparkScore > 0.0); - assertEquals(net.score(), sparkScore, 1e-3); + assertEquals(net.getScore(), sparkScore, 1e-3); } finally { sc.stop(); } @@ -386,7 +386,7 @@ public class TestCompareParameterAveragingSparkVsSingleMachine { MultiLayerNetwork net = new MultiLayerNetwork(getConfCNN(12345, new Sgd(0.5))); net.init(); - INDArray initialParams = net.params().dup(); + INDArray initialParams = net.getModelParams().dup(); for (int i = 0; i < seeds.length; i++) { DataSet ds = getOneDataSetCNN(miniBatchSizePerWorker * nWorkers, seeds[i]); @@ -394,7 +394,7 @@ public class TestCompareParameterAveragingSparkVsSingleMachine { net.setUpdater(null); net.fit(ds); } - INDArray finalParams = net.params().dup(); + INDArray finalParams = net.getModelParams().dup(); //Do training on Spark with one executor, for 3 separate minibatches ParameterAveragingTrainingMaster tm = new ParameterAveragingTrainingMaster.Builder(1) @@ -403,7 +403,7 @@ public class TestCompareParameterAveragingSparkVsSingleMachine { .rddTrainingApproach(RDDTrainingApproach.Export).build(); SparkDl4jMultiLayer sparkNet = new SparkDl4jMultiLayer(sc, getConfCNN(12345, new Sgd(0.5)), tm); sparkNet.setCollectTrainingStats(true); - INDArray initialSparkParams = sparkNet.getNetwork().params().dup(); + INDArray initialSparkParams = sparkNet.getNetwork().getModelParams().dup(); for (int i = 0; i < seeds.length; i++) { List list = @@ -416,7 +416,7 @@ public class TestCompareParameterAveragingSparkVsSingleMachine { // System.out.println(sparkNet.getSparkTrainingStats().statsAsString()); sparkNet.getSparkTrainingStats().statsAsString(); - INDArray finalSparkParams = sparkNet.getNetwork().params().dup(); + INDArray finalSparkParams = sparkNet.getNetwork().getModelParams().dup(); // System.out.println("Initial (Local) params: " + Arrays.toString(initialParams.data().asFloat())); // System.out.println("Initial (Spark) params: " @@ -429,7 +429,7 @@ public class TestCompareParameterAveragingSparkVsSingleMachine { double sparkScore = sparkNet.getScore(); assertTrue(sparkScore > 0.0); - assertEquals(net.score(), sparkScore, 1e-3); + assertEquals(net.getScore(), sparkScore, 1e-3); } finally { sc.stop(); } @@ -464,7 +464,7 @@ public class TestCompareParameterAveragingSparkVsSingleMachine { ComputationGraph net = new ComputationGraph(getGraphConf(12345, new Sgd(0.5))); net.init(); - INDArray initialParams = net.params().dup(); + INDArray initialParams = net.getModelParams().dup(); // executioner.addToWatchdog(initialParams, "initialParams"); for (int i = 0; i < seeds.length; i++) { @@ -473,14 +473,14 @@ public class TestCompareParameterAveragingSparkVsSingleMachine { net.setUpdater(null); net.fit(ds); } - INDArray finalParams = net.params().dup(); + INDArray finalParams = net.getModelParams().dup(); // executioner.addToWatchdog(finalParams, "finalParams"); //Do training on Spark with one executor, for 3 separate minibatches TrainingMaster tm = getTrainingMaster(1, miniBatchSizePerWorker, saveUpdater); SparkComputationGraph sparkNet = new SparkComputationGraph(sc, getGraphConf(12345, new Sgd(0.5)), tm); sparkNet.setCollectTrainingStats(true); - INDArray initialSparkParams = sparkNet.getNetwork().params().dup(); + INDArray initialSparkParams = sparkNet.getNetwork().getModelParams().dup(); // executioner.addToWatchdog(initialSparkParams, "initialSparkParams"); @@ -494,7 +494,7 @@ public class TestCompareParameterAveragingSparkVsSingleMachine { // System.out.println(sparkNet.getSparkTrainingStats().statsAsString()); sparkNet.getSparkTrainingStats().statsAsString(); - INDArray finalSparkParams = sparkNet.getNetwork().params().dup(); + INDArray finalSparkParams = sparkNet.getNetwork().getModelParams().dup(); // executioner.addToWatchdog(finalSparkParams, "finalSparkParams"); float[] fp = finalParams.data().asFloat(); @@ -512,7 +512,7 @@ public class TestCompareParameterAveragingSparkVsSingleMachine { double sparkScore = sparkNet.getScore(); assertTrue(sparkScore > 0.0); - assertEquals(net.score(), sparkScore, 1e-3); + assertEquals(net.getScore(), sparkScore, 1e-3); } finally { sc.stop(); } @@ -545,7 +545,7 @@ public class TestCompareParameterAveragingSparkVsSingleMachine { ComputationGraph net = new ComputationGraph(getGraphConfCNN(12345, new Sgd(0.5))); net.init(); - INDArray initialParams = net.params().dup(); + INDArray initialParams = net.getModelParams().dup(); for (int i = 0; i < seeds.length; i++) { DataSet ds = getOneDataSetCNN(miniBatchSizePerWorker * nWorkers, seeds[i]); @@ -553,13 +553,13 @@ public class TestCompareParameterAveragingSparkVsSingleMachine { net.setUpdater(null); net.fit(ds); } - INDArray finalParams = net.params().dup(); + INDArray finalParams = net.getModelParams().dup(); //Do training on Spark with one executor, for 3 separate minibatches TrainingMaster tm = getTrainingMaster(1, miniBatchSizePerWorker, saveUpdater); SparkComputationGraph sparkNet = new SparkComputationGraph(sc, getGraphConfCNN(12345, new Sgd(0.5)), tm); sparkNet.setCollectTrainingStats(true); - INDArray initialSparkParams = sparkNet.getNetwork().params().dup(); + INDArray initialSparkParams = sparkNet.getNetwork().getModelParams().dup(); for (int i = 0; i < seeds.length; i++) { List list = @@ -572,7 +572,7 @@ public class TestCompareParameterAveragingSparkVsSingleMachine { // System.out.println(sparkNet.getSparkTrainingStats().statsAsString()); sparkNet.getSparkTrainingStats().statsAsString(); - INDArray finalSparkParams = sparkNet.getNetwork().params().dup(); + INDArray finalSparkParams = sparkNet.getNetwork().getModelParams().dup(); // System.out.println("Initial (Local) params: " + Arrays.toString(initialParams.data().asFloat())); // System.out.println("Initial (Spark) params: " + Arrays.toString(initialSparkParams.data().asFloat())); @@ -584,7 +584,7 @@ public class TestCompareParameterAveragingSparkVsSingleMachine { double sparkScore = sparkNet.getScore(); assertTrue(sparkScore > 0.0); - assertEquals(net.score(), sparkScore, 1e-3); + assertEquals(net.getScore(), sparkScore, 1e-3); } finally { sc.stop(); } diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestSparkMultiLayerParameterAveraging.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestSparkMultiLayerParameterAveraging.java index 42fc1112c..e4a720a51 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestSparkMultiLayerParameterAveraging.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestSparkMultiLayerParameterAveraging.java @@ -38,7 +38,7 @@ import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.layers.BaseLayer; +import org.deeplearning4j.nn.conf.layers.BaseLayerConfiguration; import org.deeplearning4j.nn.conf.layers.BatchNormalization; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; @@ -191,11 +191,11 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { new ParameterAveragingTrainingMaster(true, numExecutors(), 1, 5, 1, 0)); MultiLayerNetwork networkCopy = sparkNetCopy.fit(data); - INDArray expectedParams = networkCopy.params(); + INDArray expectedParams = networkCopy.getModelParams(); SparkDl4jMultiLayer sparkNet = getBasicNetwork(); MultiLayerNetwork network = sparkNet.fit(data); - INDArray actualParams = network.params(); + INDArray actualParams = network.getModelParams(); assertEquals(expectedParams.size(1), actualParams.size(1)); } @@ -210,14 +210,14 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { MultiLayerNetwork netCopy = sparkNet.getNetwork().clone(); netCopy.fit(data); - IUpdater expectedUpdater = ((BaseLayer) netCopy.getLayerConfiguration()).getIUpdater(); - double expectedLR = ((Nesterovs)((BaseLayer) netCopy.getLayerConfiguration()).getIUpdater()).getLearningRate(); - double expectedMomentum = ((Nesterovs)((BaseLayer) netCopy.getLayerConfiguration()).getIUpdater()).getMomentum(); + IUpdater expectedUpdater = ((BaseLayerConfiguration) netCopy.getLayerConfiguration()).getIUpdater(); + double expectedLR = ((Nesterovs)((BaseLayerConfiguration) netCopy.getLayerConfiguration()).getIUpdater()).getLearningRate(); + double expectedMomentum = ((Nesterovs)((BaseLayerConfiguration) netCopy.getLayerConfiguration()).getIUpdater()).getMomentum(); - IUpdater actualUpdater = ((BaseLayer) sparkNet.getNetwork().getLayerConfiguration()).getIUpdater(); + IUpdater actualUpdater = ((BaseLayerConfiguration) sparkNet.getNetwork().getLayerConfiguration()).getIUpdater(); sparkNet.fit(sparkData); - double actualLR = ((Nesterovs)((BaseLayer) sparkNet.getNetwork().getLayerConfiguration()).getIUpdater()).getLearningRate(); - double actualMomentum = ((Nesterovs)((BaseLayer) sparkNet.getNetwork().getLayerConfiguration()).getIUpdater()).getMomentum(); + double actualLR = ((Nesterovs)((BaseLayerConfiguration) sparkNet.getNetwork().getLayerConfiguration()).getIUpdater()).getLearningRate(); + double actualMomentum = ((Nesterovs)((BaseLayerConfiguration) sparkNet.getNetwork().getLayerConfiguration()).getIUpdater()).getMomentum(); assertEquals(expectedUpdater, actualUpdater); assertEquals(expectedLR, actualLR, 0.01); @@ -474,11 +474,11 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { paths.add(path); } - INDArray paramsBefore = sparkNet.getNetwork().params().dup(); + INDArray paramsBefore = sparkNet.getNetwork().getModelParams().dup(); JavaRDD pathRdd = sc.parallelize(paths); sparkNet.fitPaths(pathRdd); - INDArray paramsAfter = sparkNet.getNetwork().params().dup(); + INDArray paramsAfter = sparkNet.getNetwork().getModelParams().dup(); assertNotEquals(paramsBefore, paramsAfter); SparkTrainingStats stats = sparkNet.getSparkTrainingStats(); @@ -545,11 +545,11 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { paths.add(path); } - INDArray paramsBefore = sparkNet.getNetwork().params().dup(); + INDArray paramsBefore = sparkNet.getNetwork().getModelParams().dup(); JavaRDD pathRdd = sc.parallelize(paths); sparkNet.fitPaths(pathRdd); - INDArray paramsAfter = sparkNet.getNetwork().params().dup(); + INDArray paramsAfter = sparkNet.getNetwork().getModelParams().dup(); assertNotEquals(paramsBefore, paramsAfter); Thread.sleep(200); @@ -635,11 +635,11 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { paths.add(path); } - INDArray paramsBefore = sparkNet.getNetwork().params().dup(); + INDArray paramsBefore = sparkNet.getNetwork().getModelParams().dup(); JavaRDD pathRdd = sc.parallelize(paths); sparkNet.fitPaths(pathRdd); - INDArray paramsAfter = sparkNet.getNetwork().params().dup(); + INDArray paramsAfter = sparkNet.getNetwork().getModelParams().dup(); assertNotEquals(paramsBefore, paramsAfter); SparkTrainingStats stats = sparkNet.getSparkTrainingStats(); @@ -657,11 +657,11 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { paths.add(path); } - paramsBefore = sparkNet.getNetwork().params().dup(); + paramsBefore = sparkNet.getNetwork().getModelParams().dup(); pathRdd = sc.parallelize(paths); sparkNet.fitPathsMultiDataSet(pathRdd); - paramsAfter = sparkNet.getNetwork().params().dup(); + paramsAfter = sparkNet.getNetwork().getModelParams().dup(); assertNotEquals(paramsBefore, paramsAfter); stats = sparkNet.getSparkTrainingStats(); @@ -731,9 +731,9 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { sparkNet3.fit(rdd); - INDArray p1 = sparkNet1.getNetwork().params(); - INDArray p2 = sparkNet2.getNetwork().params(); - INDArray p3 = sparkNet3.getNetwork().params(); + INDArray p1 = sparkNet1.getNetwork().getModelParams(); + INDArray p2 = sparkNet2.getNetwork().getModelParams(); + INDArray p3 = sparkNet3.getNetwork().getModelParams(); sparkNet1.getTrainingMaster().deleteTempFiles(sc); sparkNet2.getTrainingMaster().deleteTempFiles(sc); diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/pw/SharedTrainingWrapper.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/pw/SharedTrainingWrapper.java index 5bb21442c..f26ae7e66 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/pw/SharedTrainingWrapper.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/pw/SharedTrainingWrapper.java @@ -239,7 +239,7 @@ public class SharedTrainingWrapper { List listeners = worker.getListeners(); if(listeners != null){ - model.setListeners(listeners.toArray(new TrainingListener[]{})); + model.addTrainingListeners(listeners.toArray(new TrainingListener[]{})); StatsStorageRouter r = worker.getRouter(); if(r != null){ for(TrainingListener l : listeners){ @@ -319,7 +319,7 @@ public class SharedTrainingWrapper { consumer = UpdatesConsumer.builder() .numWorkers(numWorkers) .accumulator(accumulator) - .params(model.params()) + .params(model.getModelParams()) .build(); accumulator.setExternalSource(consumer.getUpdatesQueue()); @@ -382,7 +382,7 @@ public class SharedTrainingWrapper { // if we're going to extend iteratation for debugging purposes - let's do that here if (trainingConfiguration.getDebugLongerIterations() > 0) { log.warn("Adding SleepyListener: {} ms", trainingConfiguration.getDebugLongerIterations()); - model.addListeners(SleepyTrainingListener.builder() + model.addTrainingListeners(SleepyTrainingListener.builder() .timerIteration(trainingConfiguration.getDebugLongerIterations()).build()); } @@ -416,7 +416,7 @@ public class SharedTrainingWrapper { val mParams = modelParamsSupplier.get(); if (mParams != null) { log.info("Updating model params to the most recent ones..."); - originalModel.params().assign(mParams); + originalModel.getModelParams().assign(mParams); } // ok. attaching accumulator to model @@ -520,7 +520,7 @@ public class SharedTrainingWrapper { val taAveraged = mh.getAverageThresholdAlgorithm(); // FIXME: fill stats here - val result = SharedTrainingResult.builder().aggregationsCount(1).scoreSum(originalModel.score()) + val result = SharedTrainingResult.builder().aggregationsCount(1).scoreSum(originalModel.getScore()) .updaterStateArray(updaterState).listenerMetaData(new ArrayList<>()) .listenerStaticInfo(new ArrayList<>()).listenerUpdates(new ArrayList<>()) .minibatchesPerExecutor(Collections.singletonMap(SparkUtils.getSparkExecutorId(), iteratorDataSetCount.get().get())) diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/training/SharedTrainingMaster.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/training/SharedTrainingMaster.java index bb291c0b8..5d9dd9d33 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/training/SharedTrainingMaster.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/training/SharedTrainingMaster.java @@ -263,7 +263,7 @@ public class SharedTrainingMaster extends BaseTrainingMaster T[] doEvaluation(MultiDataSetIterator iterator, - T... evaluations) { + T... evaluations) { return null; } @@ -394,9 +397,6 @@ public class BarnesHutTsne implements IModel { return null; } - @Override - public void addListeners(TrainingListener... listener) {//no op - } public Map getParamTable() { return null; @@ -417,7 +417,8 @@ public class BarnesHutTsne implements IModel { } @Override - public void clear() {} + public void clear() { + } @Override public void applyConstraints(int iteration, int epoch) { @@ -440,6 +441,7 @@ public class BarnesHutTsne implements IModel { /** * Symmetrize the value matrix + * * @param rowP * @param colP * @param valP @@ -454,7 +456,8 @@ public class BarnesHutTsne implements IModel { workspaceConfigurationExternal, workspaceExternal); - try (MemoryWorkspace ws = workspace.notifyScopeEntered())*/ { + try (MemoryWorkspace ws = workspace.notifyScopeEntered())*/ + { for (int n = 0; n < N; n++) { int begin = rowP.getInt(n); int end = rowP.getInt(n + 1); @@ -487,7 +490,7 @@ public class BarnesHutTsne implements IModel { for (int n = 0; n < N; n++) { for (int i = rowP.getInt(n); i < rowP.getInt(n + 1); i++) { boolean present = false; - for (int m = rowP.getInt(colP.getInt(i)); m < rowP.getInt(colP.getInt(i)+1); m++) { + for (int m = rowP.getInt(colP.getInt(i)); m < rowP.getInt(colP.getInt(i) + 1); m++) { if (colP.getInt(m) == n) { present = true; if (n <= colP.getInt(i)) { @@ -570,7 +573,7 @@ public class BarnesHutTsne implements IModel { * @param listeners */ - public void setListeners(Collection listeners) { + public void addTrainingListeners(Collection listeners) { } @@ -580,7 +583,7 @@ public class BarnesHutTsne implements IModel { * @param listeners */ @Override - public void setListeners(TrainingListener... listeners) { + public void addTrainingListeners(TrainingListener... listeners) { } @@ -615,7 +618,8 @@ public class BarnesHutTsne implements IModel { private INDArray staticData; - public Initializer() {} + public Initializer() { + } public Initializer(INDArray input) { this.staticData = input; @@ -654,7 +658,8 @@ public class BarnesHutTsne implements IModel { workspaceExternal); - try (MemoryWorkspace ws = workspace.notifyScopeEntered())*/ { + try (MemoryWorkspace ws = workspace.notifyScopeEntered())*/ + { x.divi(x.maxNumber()); @@ -697,6 +702,7 @@ public class BarnesHutTsne implements IModel { /** * An individual iteration + * * @param p the probabilities that certain points * are near each other * @param i the iteration (primarily for debugging purposes) @@ -705,7 +711,9 @@ public class BarnesHutTsne implements IModel { update(gradient().getGradientFor(Y_GRAD), Y_GRAD); } - static double sign_tsne(double x) { return (x == .0 ? .0 : (x < .0 ? -1.0 : 1.0)); } + static double sign_tsne(double x) { + return (x == .0 ? .0 : (x < .0 ? -1.0 : 1.0)); + } @Override @@ -717,7 +725,8 @@ public class BarnesHutTsne implements IModel { workspaceConfigurationExternal, workspaceExternal); - try (MemoryWorkspace ws = workspace.notifyScopeEntered())*/ { + try (MemoryWorkspace ws = workspace.notifyScopeEntered())*/ + { INDArray yGrads = gradient; if (gains == null) @@ -726,12 +735,11 @@ public class BarnesHutTsne implements IModel { //Nd4j.getExecutioner().exec(new BarnesHutGains(gains, gains, yGrads, yIncs)); // Copied from Reference for (int i = 0; i < yGrads.rows(); ++i) { - for (int j = 0; j < yGrads.columns(); ++j) { - if (sign_tsne(yGrads.getDouble(i,j)) == sign_tsne(yIncs.getDouble(i,j))) { - gains.putScalar(new int[]{i,j}, gains.getDouble(i,j)*0.8); - } - else { - gains.putScalar(new int[]{i,j}, gains.getDouble(i,j)+0.2); + for (int j = 0; j < yGrads.columns(); ++j) { + if (sign_tsne(yGrads.getDouble(i, j)) == sign_tsne(yIncs.getDouble(i, j))) { + gains.putScalar(new int[]{i, j}, gains.getDouble(i, j) * 0.8); + } else { + gains.putScalar(new int[]{i, j}, gains.getDouble(i, j) + 0.2); } } } @@ -759,8 +767,9 @@ public class BarnesHutTsne implements IModel { /** * Save the model as a file with a csv format, adding the label as the last column. + * * @param labels - * @param path the path to write + * @param path the path to write * @throws IOException */ public void saveAsFile(List labels, String path) throws IOException { @@ -805,6 +814,7 @@ public class BarnesHutTsne implements IModel { write.flush(); } } + /** * Plot tsne * @@ -823,7 +833,7 @@ public class BarnesHutTsne implements IModel { @Override - public double score() { + public double getScore() { /*MemoryWorkspace workspace = workspaceMode == WorkspaceMode.NONE ? new DummyWorkspace() @@ -832,7 +842,8 @@ public class BarnesHutTsne implements IModel { workspaceExternal); - try (MemoryWorkspace ws = workspace.notifyScopeEntered())*/ { + try (MemoryWorkspace ws = workspace.notifyScopeEntered())*/ + { // Get estimate of normalization term @@ -871,7 +882,7 @@ public class BarnesHutTsne implements IModel { } @Override - public INDArray params() { + public INDArray getModelParams() { return null; } @@ -912,7 +923,7 @@ public class BarnesHutTsne implements IModel { } @Override - public void fit(INDArray data, LayerWorkspaceMgr workspaceMgr){ + public void fit(INDArray data, LayerWorkspaceMgr workspaceMgr) { fit(data); } @@ -937,7 +948,8 @@ public class BarnesHutTsne implements IModel { workspaceExternal); - try (MemoryWorkspace ws = workspace.notifyScopeEntered())*/ { + try (MemoryWorkspace ws = workspace.notifyScopeEntered())*/ + { if (yIncs == null) @@ -967,7 +979,7 @@ public class BarnesHutTsne implements IModel { @Override public Pair gradientAndScore() { - return new Pair<>(gradient(), score()); + return new Pair<>(gradient(), getScore()); } @Override @@ -1128,7 +1140,7 @@ public class BarnesHutTsne implements IModel { return this; } - public Builder workspaceMode(WorkspaceMode workspaceMode){ + public Builder workspaceMode(WorkspaceMode workspaceMode) { this.workspaceMode = workspaceMode; return this; } @@ -1143,7 +1155,7 @@ public class BarnesHutTsne implements IModel { @Override - public void close(){ + public void close() { //No-op } @@ -1153,7 +1165,34 @@ public class BarnesHutTsne implements IModel { * @return training listener */ @Override - public Collection getListeners() { + public Collection getTrainingListeners() { return null; } -} + + @Override + public ITraininableLayerConfiguration getTrainingConfig() { + throw new UnsupportedOperationException("Not supported"); + } + + @Override + public INDArray getParams() { + throw new RuntimeException("Not supported"); + + } + + /** + * DL4J layers typically produce the sum of the gradients during the backward pass for each layer, and if required + * (if minibatch=true) then divide by the minibatch size.
+ * However, there are some exceptions, such as the batch norm mean/variance estimate parameters: these "gradients" + * are actually not gradients, but are updates to be applied directly to the parameter vector. Put another way, + * most gradients should be divided by the minibatch to get the average; some "gradients" are actually final updates + * already, and should not be divided by the minibatch size. + * + * @param paramName Name of the parameter + * @return True if gradients should be divided by minibatch (most params); false otherwise (edge cases like batch norm mean/variance estimates) + */ + @Override + public boolean updaterDivideByMinibatch(String paramName) { + return false; + } +} \ No newline at end of file diff --git a/cavis-ui/cavis-ui-common/src/test/java/org/deeplearning4j/ui/ManualTests.java b/cavis-ui/cavis-ui-common/src/test/java/org/deeplearning4j/ui/ManualTests.java index 7c5de3bbb..415cd7ac2 100644 --- a/cavis-ui/cavis-ui-common/src/test/java/org/deeplearning4j/ui/ManualTests.java +++ b/cavis-ui/cavis-ui-common/src/test/java/org/deeplearning4j/ui/ManualTests.java @@ -30,7 +30,6 @@ import org.deeplearning4j.models.word2vec.VocabWord; import org.deeplearning4j.models.word2vec.Word2Vec; import org.deeplearning4j.nn.conf.GradientNormalization; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration.NeuralNetConfigurationBuilder; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; import org.deeplearning4j.nn.conf.layers.DenseLayer; @@ -151,7 +150,7 @@ public class ManualTests { log.info("Train model...."); - model.setListeners(new ScoreIterationListener(listenerFreq), new ConvolutionalIterationListener(listenerFreq)); + model.addTrainingListeners(new ScoreIterationListener(listenerFreq), new ConvolutionalIterationListener(listenerFreq)); while (lfw.hasNext()) { lfwNext = lfw.next(); @@ -279,7 +278,7 @@ public class ManualTests { */ log.info("Train model...."); - model.setListeners(new ConvolutionalIterationListener(1)); + model.addTrainingListeners(new ConvolutionalIterationListener(1)); //((NativeOpExecutioner) Nd4j.getExecutioner()).getLoop().setOmpNumThreads(8); @@ -339,7 +338,7 @@ public class ManualTests { model.init(); log.info("Train model...."); - model.setListeners(new ConvolutionalIterationListener(1)); + model.addTrainingListeners(new ConvolutionalIterationListener(1)); for (int i = 0; i < nEpochs; i++) { model.fit(mnistTrain); diff --git a/cavis-ui/cavis-ui-common/src/test/java/org/deeplearning4j/ui/weights/TestConvolutionalListener.java b/cavis-ui/cavis-ui-common/src/test/java/org/deeplearning4j/ui/weights/TestConvolutionalListener.java index e545ff53b..b53e55c9c 100644 --- a/cavis-ui/cavis-ui-common/src/test/java/org/deeplearning4j/ui/weights/TestConvolutionalListener.java +++ b/cavis-ui/cavis-ui-common/src/test/java/org/deeplearning4j/ui/weights/TestConvolutionalListener.java @@ -74,7 +74,7 @@ public class TestConvolutionalListener { MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); - net.setListeners(new ConvolutionalIterationListener(1), new ScoreIterationListener(1)); + net.addTrainingListeners(new ConvolutionalIterationListener(1), new ScoreIterationListener(1)); for (int i = 0; i < 10; i++) { net.fit(mnistTrain.next()); @@ -82,7 +82,7 @@ public class TestConvolutionalListener { } ComputationGraph cg = net.toComputationGraph(); - cg.setListeners(new ConvolutionalIterationListener(1), new ScoreIterationListener(1)); + cg.addTrainingListeners(new ConvolutionalIterationListener(1), new ScoreIterationListener(1)); for (int i = 0; i < 10; i++) { cg.fit(mnistTrain.next()); Thread.sleep(1000); diff --git a/cavis-ui/cavis-ui-model/src/main/java/org/deeplearning4j/ui/model/stats/BaseStatsListener.java b/cavis-ui/cavis-ui-model/src/main/java/org/deeplearning4j/ui/model/stats/BaseStatsListener.java index 3797b6550..70144bfd3 100644 --- a/cavis-ui/cavis-ui-model/src/main/java/org/deeplearning4j/ui/model/stats/BaseStatsListener.java +++ b/cavis-ui/cavis-ui-model/src/main/java/org/deeplearning4j/ui/model/stats/BaseStatsListener.java @@ -29,7 +29,6 @@ import org.deeplearning4j.core.storage.StatsStorageRouter; import org.deeplearning4j.core.storage.StorageMetaData; import org.deeplearning4j.core.storage.listener.RoutingIterationListener; import org.deeplearning4j.nn.api.Layer; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.graph.ComputationGraph; @@ -419,7 +418,7 @@ public abstract class BaseStatsListener implements RoutingIterationListener { } //--- General --- - report.reportScore(model.score()); //Always report score + report.reportScore(model.getScore()); //Always report score if (updateConfig.collectLearningRates()) { Map lrs = new HashMap<>(); diff --git a/cavis-ui/cavis-ui-model/src/test/java/org/deeplearning4j/ui/stats/TestStatsListener.java b/cavis-ui/cavis-ui-model/src/test/java/org/deeplearning4j/ui/stats/TestStatsListener.java index 9b1a4801e..24e5e1ed9 100644 --- a/cavis-ui/cavis-ui-model/src/test/java/org/deeplearning4j/ui/stats/TestStatsListener.java +++ b/cavis-ui/cavis-ui-model/src/test/java/org/deeplearning4j/ui/stats/TestStatsListener.java @@ -64,9 +64,9 @@ public class TestStatsListener extends BaseDL4JTest { StatsStorage ss = new MapDBStatsStorage(); //in-memory if (useJ7) { - net.setListeners(new J7StatsListener(ss, 1)); + net.addTrainingListeners(new J7StatsListener(ss, 1)); } else { - net.setListeners(new StatsListener(ss, 1)); + net.addTrainingListeners(new StatsListener(ss, 1)); } diff --git a/cavis-ui/cavis-ui-model/src/test/java/org/deeplearning4j/ui/stats/TestTransferStatsCollection.java b/cavis-ui/cavis-ui-model/src/test/java/org/deeplearning4j/ui/stats/TestTransferStatsCollection.java index d5b1a116b..1dc5cb1a6 100644 --- a/cavis-ui/cavis-ui-model/src/test/java/org/deeplearning4j/ui/stats/TestTransferStatsCollection.java +++ b/cavis-ui/cavis-ui-model/src/test/java/org/deeplearning4j/ui/stats/TestTransferStatsCollection.java @@ -56,7 +56,7 @@ public class TestTransferStatsCollection extends BaseDL4JTest { new FineTuneConfiguration.Builder().updater(new Sgd(0.01)).build()) .setFeatureExtractor(0).build(); - net2.setListeners(new StatsListener(new InMemoryStatsStorage())); + net2.addTrainingListeners(new StatsListener(new InMemoryStatsStorage())); //Previosuly: failed on frozen layers net2.fit(new DataSet(Nd4j.rand(8, 10), Nd4j.rand(8, 10))); diff --git a/cavis-ui/cavis-ui-vertx/src/main/java/org/deeplearning4j/ui/module/train/TrainModule.java b/cavis-ui/cavis-ui-vertx/src/main/java/org/deeplearning4j/ui/module/train/TrainModule.java index 2ca083a4d..c0df01142 100644 --- a/cavis-ui/cavis-ui-vertx/src/main/java/org/deeplearning4j/ui/module/train/TrainModule.java +++ b/cavis-ui/cavis-ui-vertx/src/main/java/org/deeplearning4j/ui/module/train/TrainModule.java @@ -1174,8 +1174,8 @@ public class TrainModule implements UIModule { layerInfoRows.add(new String[]{i18N.getMessage("train.model.layerinfotable.layerSize"), String.valueOf(ffl.getNOut())}); } - if (layer instanceof BaseLayer) { - BaseLayer bl = (BaseLayer) layer; + if (layer instanceof BaseLayerConfiguration) { + BaseLayerConfiguration bl = (BaseLayerConfiguration) layer; activationFn = bl.getActivationFn().toString(); long nParams = layer.initializer().numParams(bl.getLayer()); layerInfoRows.add(new String[]{i18N.getMessage("train.model.layerinfotable.layerNParams"), diff --git a/cavis-ui/cavis-ui-vertx/src/test/java/org/deeplearning4j/ui/TestRemoteReceiver.java b/cavis-ui/cavis-ui-vertx/src/test/java/org/deeplearning4j/ui/TestRemoteReceiver.java index 8bae39055..09dbe9846 100644 --- a/cavis-ui/cavis-ui-vertx/src/test/java/org/deeplearning4j/ui/TestRemoteReceiver.java +++ b/cavis-ui/cavis-ui-vertx/src/test/java/org/deeplearning4j/ui/TestRemoteReceiver.java @@ -142,7 +142,7 @@ public class TestRemoteReceiver extends BaseDL4JTest { MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); try(RemoteUIStatsStorageRouter ssr = new RemoteUIStatsStorageRouter("http://localhost:9000")) { - net.setListeners(new StatsListener(ssr), new ScoreIterationListener(1)); + net.addTrainingListeners(new StatsListener(ssr), new ScoreIterationListener(1)); DataSetIterator iter = new IrisDataSetIterator(150, 150); diff --git a/cavis-ui/cavis-ui-vertx/src/test/java/org/deeplearning4j/ui/TestVertxUI.java b/cavis-ui/cavis-ui-vertx/src/test/java/org/deeplearning4j/ui/TestVertxUI.java index 694a557bc..d51f74aba 100644 --- a/cavis-ui/cavis-ui-vertx/src/test/java/org/deeplearning4j/ui/TestVertxUI.java +++ b/cavis-ui/cavis-ui-vertx/src/test/java/org/deeplearning4j/ui/TestVertxUI.java @@ -112,7 +112,7 @@ public class TestVertxUI extends BaseDL4JTest { MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); - net.setListeners(new StatsListener(ss), new ScoreIterationListener(1)); + net.addTrainingListeners(new StatsListener(ss), new ScoreIterationListener(1)); DataSetIterator iter = new IrisDataSetIterator(150, 150); @@ -142,7 +142,7 @@ public class TestVertxUI extends BaseDL4JTest { MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); - net.setListeners(new StatsListener(ss, 1), new ScoreIterationListener(1)); + net.addTrainingListeners(new StatsListener(ss, 1), new ScoreIterationListener(1)); DataSetIterator iter = new IrisDataSetIterator(150, 150); @@ -171,7 +171,7 @@ public class TestVertxUI extends BaseDL4JTest { ComputationGraph net = new ComputationGraph(conf); net.init(); - net.setListeners(new StatsListener(ss), new ScoreIterationListener(1)); + net.addTrainingListeners(new StatsListener(ss), new ScoreIterationListener(1)); DataSetIterator iter = new IrisDataSetIterator(150, 150); @@ -195,7 +195,7 @@ public class TestVertxUI extends BaseDL4JTest { StatsStorage ss1 = new InMemoryStatsStorage(); - net.setListeners(new StatsListener(ss1, 1, "ss1")); + net.addTrainingListeners(new StatsListener(ss1, 1, "ss1")); DataSetIterator iter = new IrisDataSetIterator(150, 150); @@ -204,7 +204,7 @@ public class TestVertxUI extends BaseDL4JTest { } StatsStorage ss2 = new InMemoryStatsStorage(); - net.setListeners(new StatsListener(ss2, 1, "ss2")); + net.addTrainingListeners(new StatsListener(ss2, 1, "ss2")); for (int i = 0; i < 4; i++) { net.fit(iter); diff --git a/cavis-ui/cavis-ui-vertx/src/test/java/org/deeplearning4j/ui/TestVertxUIManual.java b/cavis-ui/cavis-ui-vertx/src/test/java/org/deeplearning4j/ui/TestVertxUIManual.java index bc1ae16a8..e17681c4c 100644 --- a/cavis-ui/cavis-ui-vertx/src/test/java/org/deeplearning4j/ui/TestVertxUIManual.java +++ b/cavis-ui/cavis-ui-vertx/src/test/java/org/deeplearning4j/ui/TestVertxUIManual.java @@ -108,7 +108,7 @@ public class TestVertxUIManual extends BaseDL4JTest { MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); - net.setListeners(new StatsListener(ss), new ScoreIterationListener(1)); + net.addTrainingListeners(new StatsListener(ss), new ScoreIterationListener(1)); DataSetIterator iter = new IrisDataSetIterator(150, 150); @@ -203,7 +203,7 @@ public class TestVertxUIManual extends BaseDL4JTest { StatsListener statsListener = new StatsListener(ss, 1); statsListener.setSessionID(sessionId); - net.setListeners(statsListener, new ScoreIterationListener(1)); + net.addTrainingListeners(statsListener, new ScoreIterationListener(1)); uIServer.attach(ss); DataSetIterator iter = new IrisDataSetIterator(150, 150); diff --git a/cavis-ui/cavis-ui-vertx/src/test/java/org/deeplearning4j/ui/TestVertxUIMultiSession.java b/cavis-ui/cavis-ui-vertx/src/test/java/org/deeplearning4j/ui/TestVertxUIMultiSession.java index 7da17dafd..fb21f9561 100644 --- a/cavis-ui/cavis-ui-vertx/src/test/java/org/deeplearning4j/ui/TestVertxUIMultiSession.java +++ b/cavis-ui/cavis-ui-vertx/src/test/java/org/deeplearning4j/ui/TestVertxUIMultiSession.java @@ -100,7 +100,7 @@ public class TestVertxUIMultiSession extends BaseDL4JTest { StatsListener statsListener = new StatsListener(ss, 1); statsListener.setSessionID(sessionId); - net.setListeners(statsListener, new ScoreIterationListener(1)); + net.addTrainingListeners(statsListener, new ScoreIterationListener(1)); uIServer.attach(ss); DataSetIterator iter = new IrisDataSetIterator(150, 150); @@ -164,7 +164,7 @@ public class TestVertxUIMultiSession extends BaseDL4JTest { StatsListener statsListener = new StatsListener(ss, 1); statsListener.setSessionID(sessionId); - net.setListeners(statsListener, new ScoreIterationListener(1)); + net.addTrainingListeners(statsListener, new ScoreIterationListener(1)); uIServer.attach(ss); DataSetIterator iter = new IrisDataSetIterator(150, 150); diff --git a/cavis-zoo/cavis-zoo-models/src/test/java/org/deeplearning4j/zoo/TestImageNet.java b/cavis-zoo/cavis-zoo-models/src/test/java/org/deeplearning4j/zoo/TestImageNet.java index 0e6fdfb38..8d6b94d54 100644 --- a/cavis-zoo/cavis-zoo-models/src/test/java/org/deeplearning4j/zoo/TestImageNet.java +++ b/cavis-zoo/cavis-zoo-models/src/test/java/org/deeplearning4j/zoo/TestImageNet.java @@ -108,7 +108,7 @@ public class TestImageNet extends BaseDL4JTest { assertEquals("golden retriever", predictions.get(0).get(0).getLabel()); // clean up for current model - initializedModel.params().close(); + initializedModel.getModelParams().close(); Nd4j.getWorkspaceManager().destroyAllWorkspacesForCurrentThread(); System.gc(); @@ -134,7 +134,7 @@ public class TestImageNet extends BaseDL4JTest { } // clean up for current model - initializedModel.params().close(); + initializedModel.getModelParams().close(); Nd4j.getWorkspaceManager().destroyAllWorkspacesForCurrentThread(); System.gc(); @@ -159,7 +159,7 @@ public class TestImageNet extends BaseDL4JTest { assertEquals("dog", classPrediction.getLabel()); } - initializedModel.params().close(); + initializedModel.getModelParams().close(); } } diff --git a/cavis-zoo/cavis-zoo-models/src/test/java/org/deeplearning4j/zoo/TestInstantiation.java b/cavis-zoo/cavis-zoo-models/src/test/java/org/deeplearning4j/zoo/TestInstantiation.java index 2bf9e7ed1..27eb6e23d 100644 --- a/cavis-zoo/cavis-zoo-models/src/test/java/org/deeplearning4j/zoo/TestInstantiation.java +++ b/cavis-zoo/cavis-zoo-models/src/test/java/org/deeplearning4j/zoo/TestInstantiation.java @@ -201,7 +201,7 @@ public class TestInstantiation extends BaseDL4JTest { // clean up for current model Nd4j.getWorkspaceManager().destroyAllWorkspacesForCurrentThread(); - initializedModel.params().close(); + initializedModel.getModelParams().close(); for(INDArray arr : result){ arr.close(); } @@ -271,7 +271,7 @@ public class TestInstantiation extends BaseDL4JTest { Nd4j.getWorkspaceManager().destroyAllWorkspacesForCurrentThread(); f.close(); l.close(); - initializedModel.params().close(); + initializedModel.getModelParams().close(); initializedModel.getFlattenedGradients().close(); System.gc(); } diff --git a/cavis-zoo/cavis-zoo-models/src/test/java/org/deeplearning4j/zoo/TestUtils.java b/cavis-zoo/cavis-zoo-models/src/test/java/org/deeplearning4j/zoo/TestUtils.java index 7a26046cd..d759151e8 100644 --- a/cavis-zoo/cavis-zoo-models/src/test/java/org/deeplearning4j/zoo/TestUtils.java +++ b/cavis-zoo/cavis-zoo-models/src/test/java/org/deeplearning4j/zoo/TestUtils.java @@ -46,7 +46,7 @@ public class TestUtils { MultiLayerNetwork restored = ModelSerializer.restoreMultiLayerNetwork(bais, true); assertEquals(net.getNetConfiguration(), restored.getNetConfiguration()); - assertEquals(net.params(), restored.params()); + assertEquals(net.getModelParams(), restored.getModelParams()); return restored; } catch (IOException e){ @@ -66,7 +66,7 @@ public class TestUtils { ComputationGraph restored = ModelSerializer.restoreComputationGraph(bais, true); assertEquals(net.getComputationGraphConfiguration(), restored.getComputationGraphConfiguration()); - assertEquals(net.params(), restored.params()); + assertEquals(net.getModelParams(), restored.getModelParams()); return restored; } catch (IOException e){