diff --git a/brutex-extended-tests/src/test/java/net/brutex/gan/App.java b/brutex-extended-tests/src/test/java/net/brutex/gan/App.java index aba07ef0d..0287d32a9 100644 --- a/brutex-extended-tests/src/test/java/net/brutex/gan/App.java +++ b/brutex-extended-tests/src/test/java/net/brutex/gan/App.java @@ -118,7 +118,7 @@ public class App { .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer) .gradientNormalizationThreshold(GRADIENT_THRESHOLD) //.weightInit(WeightInit.XAVIER) - .weightInitFn(new WeightInitXavier()) + .weightInit(WeightInit.XAVIER) .activation(Activation.IDENTITY) .layersFromArray(genLayers()) .inputType(InputType.convolutional(X_DIM, Y_DIM, CHANNELS)) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerBuilderTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerBuilderTest.java index 3ae5d8bd0..680681920 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerBuilderTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerBuilderTest.java @@ -74,7 +74,7 @@ public class LayerBuilderTest extends BaseDL4JTest { checkSerialization(layer); assertEquals(act, layer.getActivationFn()); - assertEquals(weight.getWeightInitFunction(), layer.getWeightInitFn()); + assertEquals(weight.getWeightInitFunction(), layer.getWeightInit()); assertEquals(new Dropout(dropOut), layer.getIDropout()); assertEquals(updater, layer.getIUpdater()); assertEquals(gradNorm, layer.getGradientNormalization()); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigTest.java index 28d17c150..7777475e6 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigTest.java @@ -99,8 +99,8 @@ public class LayerConfigTest extends BaseDL4JTest { MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); - assertEquals(new WeightInitDistribution(defaultDistribution), ((BaseLayerConfiguration) conf.getConf(0).getLayer()).getWeightInitFn()); - assertEquals(new WeightInitDistribution(defaultDistribution), ((BaseLayerConfiguration) conf.getConf(1).getLayer()).getWeightInitFn()); + assertEquals(new WeightInitDistribution(defaultDistribution), ((BaseLayerConfiguration) conf.getConf(0).getLayer()).getWeightInit()); + assertEquals(new WeightInitDistribution(defaultDistribution), ((BaseLayerConfiguration) conf.getConf(1).getLayer()).getWeightInit()); assertEquals(1, ((BaseLayerConfiguration) conf.getConf(0).getLayer()).getBiasInit(), 0.0); assertEquals(1, ((BaseLayerConfiguration) conf.getConf(1).getLayer()).getBiasInit(), 0.0); @@ -117,8 +117,8 @@ public class LayerConfigTest extends BaseDL4JTest { net = new MultiLayerNetwork(conf); net.init(); - assertEquals(new WeightInitDistribution(defaultDistribution), ((BaseLayerConfiguration) conf.getConf(0).getLayer()).getWeightInitFn()); - assertEquals(new WeightInitDistribution(overriddenDistribution), ((BaseLayerConfiguration) conf.getConf(1).getLayer()).getWeightInitFn()); + assertEquals(new WeightInitDistribution(defaultDistribution), ((BaseLayerConfiguration) conf.getConf(0).getLayer()).getWeightInit()); + assertEquals(new WeightInitDistribution(overriddenDistribution), ((BaseLayerConfiguration) conf.getConf(1).getLayer()).getWeightInit()); assertEquals(1, ((BaseLayerConfiguration) conf.getConf(0).getLayer()).getBiasInit(), 0.0); assertEquals(0, ((BaseLayerConfiguration) conf.getConf(1).getLayer()).getBiasInit(), 0.0); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigValidationTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigValidationTest.java index dae839a06..b813b2b5f 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigValidationTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigValidationTest.java @@ -185,7 +185,7 @@ public class LayerConfigValidationTest extends BaseDL4JTest { layerConf1 = (BaseLayerConfiguration) net.getLayer(1).getLayerConfiguration(); assertEquals(expectedAdamMeanDecay, ((Adam) layerConf1.getIUpdater()).getBeta1(), 1e-3); assertEquals(expectedAdamVarDecay, ((Adam) layerConf1.getIUpdater()).getBeta2(), 1e-3); - assertEquals(new WeightInitDistribution(expectedDist), layerConf1.getWeightInitFn()); + assertEquals(new WeightInitDistribution(expectedDist), layerConf1.getWeightInit()); assertNull(TestUtils.getL1Reg(layerConf1.getRegularization())); assertNull(TestUtils.getL2Reg(layerConf1.getRegularization())); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffConv.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffConv.java index 6fe2cf15e..f8a2f173b 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffConv.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffConv.java @@ -157,7 +157,7 @@ public class SameDiffConv extends SameDiffLayer { public void applyGlobalConfigToLayer(NeuralNetConfiguration.NeuralNetConfigurationBuilder globalConfig) { NeuralNetConfiguration clone = globalConfig.clone().build(); if (activation == null) { - activation = SameDiffLayerUtils.fromIActivation(clone.getActivationFn()); + activation = SameDiffLayerUtils.fromIActivation(clone.getActivation()); } if (cm == null) { cm = clone.getConvolutionMode(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffDense.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffDense.java index d0a176d63..e1799443d 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffDense.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffDense.java @@ -119,7 +119,7 @@ public class SameDiffDense extends SameDiffLayer { public void applyGlobalConfigToLayer(NeuralNetConfiguration.NeuralNetConfigurationBuilder globalConfig) { NeuralNetConfiguration clone = globalConfig.clone().build(); if(activation == null){ - activation = SameDiffLayerUtils.fromIActivation(clone.getActivationFn()); + activation = SameDiffLayerUtils.fromIActivation(clone.getActivation()); } } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningCompGraphTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningCompGraphTest.java index a8bfd8d97..954e8ed18 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningCompGraphTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningCompGraphTest.java @@ -141,9 +141,9 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { BaseLayerConfiguration bl0 = ((BaseLayerConfiguration) modelNow.getLayer("layer0").getLayerConfiguration()); BaseLayerConfiguration bl1 = ((BaseLayerConfiguration) modelNow.getLayer("layer1").getLayerConfiguration()); BaseLayerConfiguration bl3 = ((BaseLayerConfiguration) modelNow.getLayer("layer3").getLayerConfiguration()); - assertEquals(bl0.getWeightInitFn(), new WeightInitDistribution(new NormalDistribution(1, 1e-1))); - assertEquals(bl1.getWeightInitFn(), new WeightInitXavier()); - assertEquals(bl1.getWeightInitFn(), new WeightInitXavier()); + assertEquals(bl0.getWeightInit(), new WeightInitDistribution(new NormalDistribution(1, 1e-1))); + assertEquals(bl1.getWeightInit(), new WeightInitXavier()); + assertEquals(bl1.getWeightInit(), new WeightInitXavier()); ComputationGraph modelExpectedArch = new ComputationGraph(overallConf.graphBuilder().addInputs("layer0In") .addLayer("layer0", new DenseLayer.Builder().nIn(4).nOut(3).build(), "layer0In") diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningMLNTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningMLNTest.java index 88e8d5d01..7d10a3bc7 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningMLNTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningMLNTest.java @@ -163,14 +163,14 @@ public class TransferLearningMLNTest extends BaseDL4JTest { BaseLayerConfiguration bl0 = ((BaseLayerConfiguration) modelNow.getNetConfiguration().getConf(0).getLayer()); BaseLayerConfiguration bl1 = ((BaseLayerConfiguration) modelNow.getNetConfiguration().getConf(1).getLayer()); BaseLayerConfiguration bl3 = ((BaseLayerConfiguration) modelNow.getNetConfiguration().getConf(3).getLayer()); - assertEquals(bl0.getWeightInitFn().getClass(), WeightInitXavier.class); + assertEquals(bl0.getWeightInit().getClass(), WeightInitXavier.class); try { - assertEquals(JsonMappers.getMapper().writeValueAsString(bl1.getWeightInitFn()), + assertEquals(JsonMappers.getMapper().writeValueAsString(bl1.getWeightInit()), JsonMappers.getMapper().writeValueAsString(new WeightInitDistribution(new NormalDistribution(1, 1e-1)))); } catch (JsonProcessingException e) { throw new RuntimeException(e); } - assertEquals(bl3.getWeightInitFn(), new WeightInitXavier()); + assertEquals(bl3.getWeightInit(), new WeightInitXavier()); //modelNow should have the same architecture as modelExpectedArch assertArrayEquals(modelExpectedArch.getModelParams().shape(), modelNow.getModelParams().shape()); @@ -506,13 +506,13 @@ public class TransferLearningMLNTest extends BaseDL4JTest { BaseLayerConfiguration l0 = (BaseLayerConfiguration) net.getLayer(0).getLayerConfiguration(); assertEquals(new Adam(1e-4), l0.getIUpdater()); assertEquals(Activation.TANH.getActivationFunction(), l0.getActivationFn()); - assertEquals(new WeightInitRelu(), l0.getWeightInitFn()); + assertEquals(new WeightInitRelu(), l0.getWeightInit()); assertEquals(0.1, TestUtils.getL1(l0), 1e-6); BaseLayerConfiguration l1 = (BaseLayerConfiguration) net.getLayer(1).getLayerConfiguration(); assertEquals(new Adam(1e-4), l1.getIUpdater()); assertEquals(Activation.HARDSIGMOID.getActivationFunction(), l1.getActivationFn()); - assertEquals(new WeightInitRelu(), l1.getWeightInitFn()); + assertEquals(new WeightInitRelu(), l1.getWeightInit()); assertEquals(0.2, TestUtils.getL2(l1), 1e-6); assertEquals(BackpropType.Standard, conf.getBackpropType()); @@ -521,13 +521,13 @@ public class TransferLearningMLNTest extends BaseDL4JTest { l0 = (BaseLayerConfiguration) net2.getLayer(0).getLayerConfiguration(); assertEquals(new Adam(2e-2), l0.getIUpdater()); assertEquals(Activation.TANH.getActivationFunction(), l0.getActivationFn()); - assertEquals(new WeightInitRelu(), l0.getWeightInitFn()); + assertEquals(new WeightInitRelu(), l0.getWeightInit()); assertEquals(0.1, TestUtils.getL1(l0), 1e-6); l1 = (BaseLayerConfiguration) net2.getLayer(1).getLayerConfiguration(); assertEquals(new Adam(2e-2), l1.getIUpdater()); assertEquals(Activation.HARDSIGMOID.getActivationFunction(), l1.getActivationFn()); - assertEquals(new WeightInitRelu(), l1.getWeightInitFn()); + assertEquals(new WeightInitRelu(), l1.getWeightInit()); assertEquals(0.2, TestUtils.getL2(l1), 1e-6); assertEquals(BackpropType.TruncatedBPTT, net2.getNetConfiguration().getBackpropType()); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/updater/TestUpdaters.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/updater/TestUpdaters.java index f92e34bf2..ce7d713dc 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/updater/TestUpdaters.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/updater/TestUpdaters.java @@ -37,6 +37,7 @@ import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.params.DefaultParamInitializer; import org.deeplearning4j.nn.params.PretrainParamInitializer; import org.deeplearning4j.nn.updater.graph.ComputationGraphUpdater; +import org.deeplearning4j.nn.weights.WeightInit; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.nd4j.linalg.activations.Activation; @@ -940,7 +941,9 @@ public class TestUpdaters extends BaseDL4JTest { List blocks; NeuralNetConfiguration conf = - NeuralNetConfiguration.builder().updater(new Adam(0.5)).list() + NeuralNetConfiguration.builder() + .updater(new Adam(0.5)) + .weightInit(WeightInit.NORMAL) .layer(0, new VariationalAutoencoder.Builder().nIn(8).nOut(12) .encoderLayerSizes(10, 11).decoderLayerSizes(13, 14).build()) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest050.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest050.java index 773ccbae8..a771e414b 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest050.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest050.java @@ -72,7 +72,7 @@ public class RegressionTest050 extends BaseDL4JTest { assertEquals("relu", l0.getActivationFn().toString()); assertEquals(3, l0.getNIn()); assertEquals(4, l0.getNOut()); - assertEquals(new WeightInitXavier(), l0.getWeightInitFn()); + assertEquals(new WeightInitXavier(), l0.getWeightInit()); assertEquals(new Nesterovs(0.15, 0.9), l0.getIUpdater()); assertEquals(0.15, ((Nesterovs)l0.getIUpdater()).getLearningRate(), 1e-6); @@ -81,7 +81,7 @@ public class RegressionTest050 extends BaseDL4JTest { assertTrue(l1.getLossFn() instanceof LossMCXENT); assertEquals(4, l1.getNIn()); assertEquals(5, l1.getNOut()); - assertEquals(new WeightInitXavier(), l1.getWeightInitFn()); + assertEquals(new WeightInitXavier(), l1.getWeightInit()); assertEquals(new Nesterovs(0.15, 0.9), l1.getIUpdater()); assertEquals(0.9, ((Nesterovs)l1.getIUpdater()).getMomentum(), 1e-6); assertEquals(0.15, ((Nesterovs)l1.getIUpdater()).getLearningRate(), 1e-6); @@ -106,7 +106,7 @@ public class RegressionTest050 extends BaseDL4JTest { assertTrue(l0.getActivationFn() instanceof ActivationLReLU); assertEquals(3, l0.getNIn()); assertEquals(4, l0.getNOut()); - assertEquals(new WeightInitDistribution(new NormalDistribution(0.1, 1.2)), l0.getWeightInitFn()); + assertEquals(new WeightInitDistribution(new NormalDistribution(0.1, 1.2)), l0.getWeightInit()); assertEquals(new RmsProp(0.15, 0.96, RmsProp.DEFAULT_RMSPROP_EPSILON), l0.getIUpdater()); assertEquals(0.15, ((RmsProp)l0.getIUpdater()).getLearningRate(), 1e-6); assertEquals(new Dropout(0.6), l0.getIDropout()); @@ -118,7 +118,7 @@ public class RegressionTest050 extends BaseDL4JTest { assertTrue(l1.getLossFn() instanceof LossMSE); assertEquals(4, l1.getNIn()); assertEquals(5, l1.getNOut()); - assertEquals(new WeightInitDistribution(new NormalDistribution(0.1, 1.2)), l0.getWeightInitFn()); + assertEquals(new WeightInitDistribution(new NormalDistribution(0.1, 1.2)), l0.getWeightInit()); assertEquals(new RmsProp(0.15, 0.96, RmsProp.DEFAULT_RMSPROP_EPSILON), l1.getIUpdater()); assertEquals(0.15, ((RmsProp)l1.getIUpdater()).getLearningRate(), 1e-6); assertEquals(new Dropout(0.6), l1.getIDropout()); @@ -145,7 +145,7 @@ public class RegressionTest050 extends BaseDL4JTest { assertEquals("tanh", l0.getActivationFn().toString()); assertEquals(3, l0.getNIn()); assertEquals(3, l0.getNOut()); - assertEquals(new WeightInitRelu(), l0.getWeightInitFn()); + assertEquals(new WeightInitRelu(), l0.getWeightInit()); assertEquals(new RmsProp(0.15, 0.96, RmsProp.DEFAULT_RMSPROP_EPSILON), l0.getIUpdater()); assertEquals(0.15, ((RmsProp)l0.getIUpdater()).getLearningRate(), 1e-6); assertArrayEquals(new int[] {2, 2}, l0.getKernelSize()); @@ -165,7 +165,7 @@ public class RegressionTest050 extends BaseDL4JTest { assertTrue(l2.getLossFn() instanceof LossNegativeLogLikelihood); assertEquals(26 * 26 * 3, l2.getNIn()); assertEquals(5, l2.getNOut()); - assertEquals(new WeightInitRelu(), l0.getWeightInitFn()); + assertEquals(new WeightInitRelu(), l0.getWeightInit()); assertEquals(new RmsProp(0.15, 0.96, RmsProp.DEFAULT_RMSPROP_EPSILON), l0.getIUpdater()); assertEquals(0.15, ((RmsProp)l0.getIUpdater()).getLearningRate(), 1e-6); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest060.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest060.java index c75c11d11..8d6dae94a 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest060.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest060.java @@ -74,7 +74,7 @@ public class RegressionTest060 extends BaseDL4JTest { assertEquals("relu", l0.getActivationFn().toString()); assertEquals(3, l0.getNIn()); assertEquals(4, l0.getNOut()); - assertEquals(new WeightInitXavier(), l0.getWeightInitFn()); + assertEquals(new WeightInitXavier(), l0.getWeightInit()); assertEquals(new Nesterovs(0.15, 0.9), l0.getIUpdater()); assertEquals(0.15, ((Nesterovs)l0.getIUpdater()).getLearningRate(), 1e-6); @@ -83,7 +83,7 @@ public class RegressionTest060 extends BaseDL4JTest { assertTrue(l1.getLossFn() instanceof LossMCXENT); assertEquals(4, l1.getNIn()); assertEquals(5, l1.getNOut()); - assertEquals(new WeightInitXavier(), l1.getWeightInitFn()); + assertEquals(new WeightInitXavier(), l1.getWeightInit()); assertEquals(new Nesterovs(0.15, 0.9), l1.getIUpdater()); assertEquals(0.9, ((Nesterovs)l1.getIUpdater()).getMomentum(), 1e-6); assertEquals(0.15, ((Nesterovs)l1.getIUpdater()).getLearningRate(), 1e-6); @@ -108,7 +108,7 @@ public class RegressionTest060 extends BaseDL4JTest { assertTrue(l0.getActivationFn() instanceof ActivationLReLU); assertEquals(3, l0.getNIn()); assertEquals(4, l0.getNOut()); - assertEquals(new WeightInitDistribution(new NormalDistribution(0.1, 1.2)), l0.getWeightInitFn()); + assertEquals(new WeightInitDistribution(new NormalDistribution(0.1, 1.2)), l0.getWeightInit()); assertEquals(new RmsProp(0.15, 0.96, RmsProp.DEFAULT_RMSPROP_EPSILON), l0.getIUpdater()); assertEquals(0.15, ((RmsProp)l0.getIUpdater()).getLearningRate(), 1e-6); assertEquals(new Dropout(0.6), l0.getIDropout()); @@ -122,7 +122,7 @@ public class RegressionTest060 extends BaseDL4JTest { assertTrue(l1.getLossFn() instanceof LossMSE); assertEquals(4, l1.getNIn()); assertEquals(5, l1.getNOut()); - assertEquals(new WeightInitDistribution(new NormalDistribution(0.1, 1.2)), l0.getWeightInitFn()); + assertEquals(new WeightInitDistribution(new NormalDistribution(0.1, 1.2)), l0.getWeightInit()); assertEquals(new RmsProp(0.15, 0.96, RmsProp.DEFAULT_RMSPROP_EPSILON), l1.getIUpdater()); assertEquals(0.15, ((RmsProp)l1.getIUpdater()).getLearningRate(), 1e-6); assertEquals(new Dropout(0.6), l1.getIDropout()); @@ -151,7 +151,7 @@ public class RegressionTest060 extends BaseDL4JTest { assertEquals("tanh", l0.getActivationFn().toString()); assertEquals(3, l0.getNIn()); assertEquals(3, l0.getNOut()); - assertEquals(new WeightInitRelu(), l0.getWeightInitFn()); + assertEquals(new WeightInitRelu(), l0.getWeightInit()); assertEquals(new RmsProp(0.15, 0.96, RmsProp.DEFAULT_RMSPROP_EPSILON), l0.getIUpdater()); assertEquals(0.15, ((RmsProp)l0.getIUpdater()).getLearningRate(), 1e-6); assertArrayEquals(new int[] {2, 2}, l0.getKernelSize()); @@ -171,7 +171,7 @@ public class RegressionTest060 extends BaseDL4JTest { assertTrue(l2.getLossFn() instanceof LossNegativeLogLikelihood); //TODO assertEquals(26 * 26 * 3, l2.getNIn()); assertEquals(5, l2.getNOut()); - assertEquals(new WeightInitRelu(), l0.getWeightInitFn()); + assertEquals(new WeightInitRelu(), l0.getWeightInit()); assertEquals(new RmsProp(0.15, 0.96, RmsProp.DEFAULT_RMSPROP_EPSILON), l0.getIUpdater()); assertEquals(0.15, ((RmsProp)l0.getIUpdater()).getLearningRate(), 1e-6); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest071.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest071.java index 63ea30e49..8589b7de2 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest071.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest071.java @@ -75,7 +75,7 @@ public class RegressionTest071 extends BaseDL4JTest { assertEquals("relu", l0.getActivationFn().toString()); assertEquals(3, l0.getNIn()); assertEquals(4, l0.getNOut()); - assertEquals(new WeightInitXavier(), l0.getWeightInitFn()); + assertEquals(new WeightInitXavier(), l0.getWeightInit()); assertEquals(new Nesterovs(0.15, 0.9), l0.getIUpdater()); assertEquals(0.15, ((Nesterovs)l0.getIUpdater()).getLearningRate(), 1e-6); @@ -84,7 +84,7 @@ public class RegressionTest071 extends BaseDL4JTest { assertTrue(l1.getLossFn() instanceof LossMCXENT); assertEquals(4, l1.getNIn()); assertEquals(5, l1.getNOut()); - assertEquals(new WeightInitXavier(), l1.getWeightInitFn()); + assertEquals(new WeightInitXavier(), l1.getWeightInit()); assertEquals(0.9, ((Nesterovs)l1.getIUpdater()).getMomentum(), 1e-6); assertEquals(0.9, ((Nesterovs)l1.getIUpdater()).getMomentum(), 1e-6); assertEquals(0.15, ((Nesterovs)l1.getIUpdater()).getLearningRate(), 1e-6); @@ -109,7 +109,7 @@ public class RegressionTest071 extends BaseDL4JTest { assertTrue(l0.getActivationFn() instanceof ActivationLReLU); assertEquals(3, l0.getNIn()); assertEquals(4, l0.getNOut()); - assertEquals(new WeightInitDistribution(new NormalDistribution(0.1, 1.2)), l0.getWeightInitFn()); + assertEquals(new WeightInitDistribution(new NormalDistribution(0.1, 1.2)), l0.getWeightInit()); assertEquals(new RmsProp(0.15, 0.96, RmsProp.DEFAULT_RMSPROP_EPSILON), l0.getIUpdater()); assertEquals(0.15, ((RmsProp)l0.getIUpdater()).getLearningRate(), 1e-6); assertEquals(new Dropout(0.6), l0.getIDropout()); @@ -123,7 +123,7 @@ public class RegressionTest071 extends BaseDL4JTest { assertTrue(l1.getLossFn() instanceof LossMSE); assertEquals(4, l1.getNIn()); assertEquals(5, l1.getNOut()); - assertEquals(new WeightInitDistribution(new NormalDistribution(0.1, 1.2)), l0.getWeightInitFn()); + assertEquals(new WeightInitDistribution(new NormalDistribution(0.1, 1.2)), l0.getWeightInit()); assertEquals(new RmsProp(0.15, 0.96, RmsProp.DEFAULT_RMSPROP_EPSILON), l1.getIUpdater()); assertEquals(0.15, ((RmsProp)l0.getIUpdater()).getLearningRate(), 1e-6); assertEquals(new Dropout(0.6), l1.getIDropout()); @@ -152,7 +152,7 @@ public class RegressionTest071 extends BaseDL4JTest { assertEquals("tanh", l0.getActivationFn().toString()); assertEquals(3, l0.getNIn()); assertEquals(3, l0.getNOut()); - assertEquals(new WeightInitRelu(), l0.getWeightInitFn()); + assertEquals(new WeightInitRelu(), l0.getWeightInit()); assertEquals(new RmsProp(0.15, 0.96, RmsProp.DEFAULT_RMSPROP_EPSILON), l0.getIUpdater()); assertEquals(0.15, ((RmsProp)l0.getIUpdater()).getLearningRate(), 1e-6); assertArrayEquals(new int[] {2, 2}, l0.getKernelSize()); @@ -172,7 +172,7 @@ public class RegressionTest071 extends BaseDL4JTest { assertTrue(l2.getLossFn() instanceof LossNegativeLogLikelihood); //TODO assertEquals(26 * 26 * 3, l2.getNIn()); assertEquals(5, l2.getNOut()); - assertEquals(new WeightInitRelu(), l0.getWeightInitFn()); + assertEquals(new WeightInitRelu(), l0.getWeightInit()); assertEquals(new RmsProp(0.15, 0.96, RmsProp.DEFAULT_RMSPROP_EPSILON), l0.getIUpdater()); assertEquals(0.15, ((RmsProp)l0.getIUpdater()).getLearningRate(), 1e-6); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest080.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest080.java index 010ac9733..90cb2c126 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest080.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest080.java @@ -74,7 +74,7 @@ public class RegressionTest080 extends BaseDL4JTest { assertTrue(l0.getActivationFn() instanceof ActivationReLU); assertEquals(3, l0.getNIn()); assertEquals(4, l0.getNOut()); - assertEquals(new WeightInitXavier(), l0.getWeightInitFn()); + assertEquals(new WeightInitXavier(), l0.getWeightInit()); assertTrue(l0.getIUpdater() instanceof Nesterovs); Nesterovs n = (Nesterovs) l0.getIUpdater(); assertEquals(0.9, n.getMomentum(), 1e-6); @@ -87,7 +87,7 @@ public class RegressionTest080 extends BaseDL4JTest { assertTrue(l1.getLossFn() instanceof LossMCXENT); assertEquals(4, l1.getNIn()); assertEquals(5, l1.getNOut()); - assertEquals(new WeightInitXavier(), l1.getWeightInitFn()); + assertEquals(new WeightInitXavier(), l1.getWeightInit()); assertTrue(l1.getIUpdater() instanceof Nesterovs); assertEquals(0.9, ((Nesterovs)l1.getIUpdater()).getMomentum(), 1e-6); assertEquals(0.15, ((Nesterovs)l1.getIUpdater()).getLearningRate(), 1e-6); @@ -113,7 +113,7 @@ public class RegressionTest080 extends BaseDL4JTest { assertTrue(l0.getActivationFn() instanceof ActivationLReLU); assertEquals(3, l0.getNIn()); assertEquals(4, l0.getNOut()); - assertEquals(new WeightInitDistribution(new NormalDistribution(0.1, 1.2)), l0.getWeightInitFn()); + assertEquals(new WeightInitDistribution(new NormalDistribution(0.1, 1.2)), l0.getWeightInit()); assertTrue(l0.getIUpdater() instanceof RmsProp); RmsProp r = (RmsProp) l0.getIUpdater(); assertEquals(0.96, r.getRmsDecay(), 1e-6); @@ -130,7 +130,7 @@ public class RegressionTest080 extends BaseDL4JTest { assertTrue(l1.getLossFn() instanceof LossMSE); assertEquals(4, l1.getNIn()); assertEquals(5, l1.getNOut()); - assertEquals(new WeightInitDistribution(new NormalDistribution(0.1, 1.2)), l1.getWeightInitFn()); + assertEquals(new WeightInitDistribution(new NormalDistribution(0.1, 1.2)), l1.getWeightInit()); assertTrue(l1.getIUpdater() instanceof RmsProp); r = (RmsProp) l1.getIUpdater(); assertEquals(0.96, r.getRmsDecay(), 1e-6); @@ -162,7 +162,7 @@ public class RegressionTest080 extends BaseDL4JTest { assertTrue(l0.getActivationFn() instanceof ActivationTanH); assertEquals(3, l0.getNIn()); assertEquals(3, l0.getNOut()); - assertEquals(new WeightInitRelu(), l0.getWeightInitFn()); + assertEquals(new WeightInitRelu(), l0.getWeightInit()); assertTrue(l0.getIUpdater() instanceof RmsProp); RmsProp r = (RmsProp) l0.getIUpdater(); assertEquals(0.96, r.getRmsDecay(), 1e-6); @@ -185,7 +185,7 @@ public class RegressionTest080 extends BaseDL4JTest { assertTrue(l2.getLossFn() instanceof LossNegativeLogLikelihood); assertEquals(26 * 26 * 3, l2.getNIn()); assertEquals(5, l2.getNOut()); - assertEquals(new WeightInitRelu(), l2.getWeightInitFn()); + assertEquals(new WeightInitRelu(), l2.getWeightInit()); assertTrue(l2.getIUpdater() instanceof RmsProp); r = (RmsProp) l2.getIUpdater(); assertEquals(0.96, r.getRmsDecay(), 1e-6); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100a.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100a.java index 6b6558c48..6555c5eec 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100a.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100a.java @@ -89,21 +89,21 @@ public class RegressionTest100a extends BaseDL4JTest { GravesLSTM l0 = (GravesLSTM) net.getLayer(0).getLayerConfiguration(); assertEquals(new ActivationTanH(), l0.getActivationFn()); assertEquals(200, l0.getNOut()); - assertEquals(new WeightInitXavier(), l0.getWeightInitFn()); + assertEquals(new WeightInitXavier(), l0.getWeightInit()); assertEquals(new WeightDecay(0.001, false), TestUtils.getWeightDecayReg(l0)); assertEquals(new RmsProp(0.1), l0.getIUpdater()); GravesLSTM l1 = (GravesLSTM) net.getLayer(1).getLayerConfiguration(); assertEquals(new ActivationTanH(), l1.getActivationFn()); assertEquals(200, l1.getNOut()); - assertEquals(new WeightInitXavier(), l1.getWeightInitFn()); + assertEquals(new WeightInitXavier(), l1.getWeightInit()); assertEquals(new WeightDecay(0.001, false), TestUtils.getWeightDecayReg(l1)); assertEquals(new RmsProp(0.1), l1.getIUpdater()); RnnOutputLayer l2 = (RnnOutputLayer) net.getLayer(2).getLayerConfiguration(); assertEquals(new ActivationSoftmax(), l2.getActivationFn()); assertEquals(77, l2.getNOut()); - assertEquals(new WeightInitXavier(), l2.getWeightInitFn()); + assertEquals(new WeightInitXavier(), l2.getWeightInit()); assertEquals(new WeightDecay(0.001, false), TestUtils.getWeightDecayReg(l0)); assertEquals(new RmsProp(0.1), l0.getIUpdater()); @@ -139,7 +139,7 @@ public class RegressionTest100a extends BaseDL4JTest { assertEquals(32, l0.getNOut()); assertArrayEquals(new int[]{256, 256}, l0.getEncoderLayerSizes()); assertArrayEquals(new int[]{256, 256}, l0.getDecoderLayerSizes()); - assertEquals(new WeightInitXavier(), l0.getWeightInitFn()); + assertEquals(new WeightInitXavier(), l0.getWeightInit()); assertEquals(new WeightDecay(1e-4, false), TestUtils.getWeightDecayReg(l0)); assertEquals(new Adam(0.05), l0.getIUpdater()); @@ -175,7 +175,7 @@ public class RegressionTest100a extends BaseDL4JTest { assertEquals(nBoxes * (5 + nClasses), cl.getNOut()); assertEquals(new ActivationIdentity(), cl.getActivationFn()); assertEquals(ConvolutionMode.Same, cl.getConvolutionMode()); - assertEquals(new WeightInitXavier(), cl.getWeightInitFn()); + assertEquals(new WeightInitXavier(), cl.getWeightInit()); assertArrayEquals(new int[]{1,1}, cl.getKernelSize()); assertArrayEquals(new int[]{1,1}, cl.getKernelSize()); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b3.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b3.java index 829fc8c2b..223b7be91 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b3.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b3.java @@ -124,21 +124,21 @@ public class RegressionTest100b3 extends BaseDL4JTest { LSTM l0 = (LSTM) net.getLayer(0).getLayerConfiguration(); assertEquals(new ActivationTanH(), l0.getActivationFn()); assertEquals(200, l0.getNOut()); - assertEquals(new WeightInitXavier(), l0.getWeightInitFn()); + assertEquals(new WeightInitXavier(), l0.getWeightInit()); assertEquals(new WeightDecay(0.0001, false), TestUtils.getWeightDecayReg(l0)); assertEquals(new Adam(0.005), l0.getIUpdater()); LSTM l1 = (LSTM) net.getLayer(1).getLayerConfiguration(); assertEquals(new ActivationTanH(), l1.getActivationFn()); assertEquals(200, l1.getNOut()); - assertEquals(new WeightInitXavier(), l1.getWeightInitFn()); + assertEquals(new WeightInitXavier(), l1.getWeightInit()); assertEquals(new WeightDecay(0.0001, false), TestUtils.getWeightDecayReg(l1)); assertEquals(new Adam(0.005), l1.getIUpdater()); RnnOutputLayer l2 = (RnnOutputLayer) net.getLayer(2).getLayerConfiguration(); assertEquals(new ActivationSoftmax(), l2.getActivationFn()); assertEquals(77, l2.getNOut()); - assertEquals(new WeightInitXavier(), l2.getWeightInitFn()); + assertEquals(new WeightInitXavier(), l2.getWeightInit()); assertEquals(new WeightDecay(0.0001, false), TestUtils.getWeightDecayReg(l0)); assertEquals(new Adam(0.005), l0.getIUpdater()); @@ -174,7 +174,7 @@ public class RegressionTest100b3 extends BaseDL4JTest { assertEquals(32, l0.getNOut()); assertArrayEquals(new int[]{256, 256}, l0.getEncoderLayerSizes()); assertArrayEquals(new int[]{256, 256}, l0.getDecoderLayerSizes()); - assertEquals(new WeightInitXavier(), l0.getWeightInitFn()); + assertEquals(new WeightInitXavier(), l0.getWeightInit()); assertEquals(new WeightDecay(1e-4, false), TestUtils.getWeightDecayReg(l0)); assertEquals(new Adam(1e-3), l0.getIUpdater()); @@ -210,7 +210,7 @@ public class RegressionTest100b3 extends BaseDL4JTest { assertEquals(nBoxes * (5 + nClasses), cl.getNOut()); assertEquals(new ActivationIdentity(), cl.getActivationFn()); assertEquals(ConvolutionMode.Same, cl.getConvolutionMode()); - assertEquals(new WeightInitXavier(), cl.getWeightInitFn()); + assertEquals(new WeightInitXavier(), cl.getWeightInit()); assertArrayEquals(new int[]{1,1}, cl.getKernelSize()); assertArrayEquals(new int[]{1,1}, cl.getKernelSize()); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b4.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b4.java index b1247b3c1..6cdede6bd 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b4.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b4.java @@ -142,21 +142,21 @@ public class RegressionTest100b4 extends BaseDL4JTest { LSTM l0 = (LSTM) net.getLayer(0).getLayerConfiguration(); assertEquals(new ActivationTanH(), l0.getActivationFn()); assertEquals(200, l0.getNOut()); - assertEquals(new WeightInitXavier(), l0.getWeightInitFn()); + assertEquals(new WeightInitXavier(), l0.getWeightInit()); assertEquals(new L2Regularization(0.0001), TestUtils.getL2Reg(l0)); assertEquals(new Adam(0.005), l0.getIUpdater()); LSTM l1 = (LSTM) net.getLayer(1).getLayerConfiguration(); assertEquals(new ActivationTanH(), l1.getActivationFn()); assertEquals(200, l1.getNOut()); - assertEquals(new WeightInitXavier(), l1.getWeightInitFn()); + assertEquals(new WeightInitXavier(), l1.getWeightInit()); assertEquals(new L2Regularization(0.0001), TestUtils.getL2Reg(l1)); assertEquals(new Adam(0.005), l1.getIUpdater()); RnnOutputLayer l2 = (RnnOutputLayer) net.getLayer(2).getLayerConfiguration(); assertEquals(new ActivationSoftmax(), l2.getActivationFn()); assertEquals(77, l2.getNOut()); - assertEquals(new WeightInitXavier(), l2.getWeightInitFn()); + assertEquals(new WeightInitXavier(), l2.getWeightInit()); assertEquals(new L2Regularization(0.0001), TestUtils.getL2Reg(l2)); assertEquals(new Adam(0.005), l2.getIUpdater()); @@ -192,7 +192,7 @@ public class RegressionTest100b4 extends BaseDL4JTest { assertEquals(32, l0.getNOut()); assertArrayEquals(new int[]{256, 256}, l0.getEncoderLayerSizes()); assertArrayEquals(new int[]{256, 256}, l0.getDecoderLayerSizes()); - assertEquals(new WeightInitXavier(), l0.getWeightInitFn()); + assertEquals(new WeightInitXavier(), l0.getWeightInit()); assertEquals(new L2Regularization(0.0001), TestUtils.getL2Reg(l0)); assertEquals(new Adam(1e-3), l0.getIUpdater()); @@ -229,7 +229,7 @@ public class RegressionTest100b4 extends BaseDL4JTest { assertEquals(nBoxes * (5 + nClasses), cl.getNOut()); assertEquals(new ActivationIdentity(), cl.getActivationFn()); assertEquals(ConvolutionMode.Same, cl.getConvolutionMode()); - assertEquals(new WeightInitXavier(), cl.getWeightInitFn()); + assertEquals(new WeightInitXavier(), cl.getWeightInit()); assertArrayEquals(new int[]{1, 1}, cl.getKernelSize()); INDArray outExp; @@ -260,7 +260,7 @@ public class RegressionTest100b4 extends BaseDL4JTest { ConvolutionLayer l0 = (ConvolutionLayer) net.getLayer(0).getLayerConfiguration(); assertEquals(new ActivationReLU(), l0.getActivationFn()); assertEquals(4, l0.getNOut()); - assertEquals(new WeightInitXavier(), l0.getWeightInitFn()); + assertEquals(new WeightInitXavier(), l0.getWeightInit()); assertEquals(new L2Regularization(0.0001), TestUtils.getL2Reg(l0)); assertEquals(new Adam(0.005), l0.getIUpdater()); assertArrayEquals(new int[]{3, 3}, l0.getKernelSize()); @@ -271,7 +271,7 @@ public class RegressionTest100b4 extends BaseDL4JTest { SeparableConvolution2D l1 = (SeparableConvolution2D) net.getLayer(1).getLayerConfiguration(); assertEquals(new ActivationReLU(), l1.getActivationFn()); assertEquals(8, l1.getNOut()); - assertEquals(new WeightInitXavier(), l1.getWeightInitFn()); + assertEquals(new WeightInitXavier(), l1.getWeightInit()); assertEquals(new L2Regularization(0.0001), TestUtils.getL2Reg(l1)); assertEquals(new Adam(0.005), l1.getIUpdater()); assertArrayEquals(new int[]{3, 3}, l1.getKernelSize()); @@ -297,7 +297,7 @@ public class RegressionTest100b4 extends BaseDL4JTest { DepthwiseConvolution2D l5 = (DepthwiseConvolution2D) net.getLayer(5).getLayerConfiguration(); assertEquals(new ActivationReLU(), l5.getActivationFn()); assertEquals(16, l5.getNOut()); - assertEquals(new WeightInitXavier(), l5.getWeightInitFn()); + assertEquals(new WeightInitXavier(), l5.getWeightInit()); assertEquals(new L2Regularization(0.0001), TestUtils.getL2Reg(l5)); assertEquals(new Adam(0.005), l5.getIUpdater()); assertArrayEquals(new int[]{3, 3}, l5.getKernelSize()); @@ -318,7 +318,7 @@ public class RegressionTest100b4 extends BaseDL4JTest { ConvolutionLayer l8 = (ConvolutionLayer) net.getLayer(8).getLayerConfiguration(); assertEquals(4, l8.getNOut()); - assertEquals(new WeightInitXavier(), l8.getWeightInitFn()); + assertEquals(new WeightInitXavier(), l8.getWeightInit()); assertEquals(new L2Regularization(0.0001), TestUtils.getL2Reg(l8)); assertEquals(new Adam(0.005), l8.getIUpdater()); assertArrayEquals(new int[]{4, 4}, l8.getKernelSize()); @@ -327,7 +327,7 @@ public class RegressionTest100b4 extends BaseDL4JTest { assertArrayEquals(new int[]{0, 0}, l8.getPadding()); CnnLossLayer l9 = (CnnLossLayer) net.getLayer(9).getLayerConfiguration(); - assertEquals(new WeightInitXavier(), l9.getWeightInitFn()); + assertEquals(new WeightInitXavier(), l9.getWeightInit()); assertEquals(new L2Regularization(0.0001), TestUtils.getL2Reg(l9)); assertEquals(new Adam(0.005), l9.getIUpdater()); assertEquals(new LossMAE(), l9.getLossFn()); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b6.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b6.java index f00b9c437..c0ee3dca2 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b6.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b6.java @@ -124,21 +124,21 @@ public class RegressionTest100b6 extends BaseDL4JTest { LSTM l0 = (LSTM) net.getLayer(0).getLayerConfiguration(); assertEquals(new ActivationTanH(), l0.getActivationFn()); assertEquals(200, l0.getNOut()); - assertEquals(new WeightInitXavier(), l0.getWeightInitFn()); + assertEquals(new WeightInitXavier(), l0.getWeightInit()); assertEquals(new L2Regularization(0.0001), TestUtils.getL2Reg(l0)); assertEquals(new Adam(0.005), l0.getIUpdater()); LSTM l1 = (LSTM) net.getLayer(1).getLayerConfiguration(); assertEquals(new ActivationTanH(), l1.getActivationFn()); assertEquals(200, l1.getNOut()); - assertEquals(new WeightInitXavier(), l1.getWeightInitFn()); + assertEquals(new WeightInitXavier(), l1.getWeightInit()); assertEquals(new L2Regularization(0.0001), TestUtils.getL2Reg(l1)); assertEquals(new Adam(0.005), l1.getIUpdater()); RnnOutputLayer l2 = (RnnOutputLayer) net.getLayer(2).getLayerConfiguration(); assertEquals(new ActivationSoftmax(), l2.getActivationFn()); assertEquals(77, l2.getNOut()); - assertEquals(new WeightInitXavier(), l2.getWeightInitFn()); + assertEquals(new WeightInitXavier(), l2.getWeightInit()); assertEquals(new L2Regularization(0.0001), TestUtils.getL2Reg(l2)); assertEquals(new Adam(0.005), l2.getIUpdater()); @@ -174,7 +174,7 @@ public class RegressionTest100b6 extends BaseDL4JTest { assertEquals(32, l0.getNOut()); assertArrayEquals(new int[]{256, 256}, l0.getEncoderLayerSizes()); assertArrayEquals(new int[]{256, 256}, l0.getDecoderLayerSizes()); - assertEquals(new WeightInitXavier(), l0.getWeightInitFn()); + assertEquals(new WeightInitXavier(), l0.getWeightInit()); assertEquals(new L2Regularization(0.0001), TestUtils.getL2Reg(l0)); assertEquals(new Adam(1e-3), l0.getIUpdater()); @@ -210,7 +210,7 @@ public class RegressionTest100b6 extends BaseDL4JTest { assertEquals(nBoxes * (5 + nClasses), cl.getNOut()); assertEquals(new ActivationIdentity(), cl.getActivationFn()); assertEquals(ConvolutionMode.Same, cl.getConvolutionMode()); - assertEquals(new WeightInitXavier(), cl.getWeightInitFn()); + assertEquals(new WeightInitXavier(), cl.getWeightInit()); assertArrayEquals(new int[]{1, 1}, cl.getKernelSize()); INDArray outExp; @@ -240,7 +240,7 @@ public class RegressionTest100b6 extends BaseDL4JTest { ConvolutionLayer l0 = (ConvolutionLayer) net.getLayer(0).getLayerConfiguration(); assertEquals(new ActivationReLU(), l0.getActivationFn()); assertEquals(4, l0.getNOut()); - assertEquals(new WeightInitXavier(), l0.getWeightInitFn()); + assertEquals(new WeightInitXavier(), l0.getWeightInit()); assertEquals(new L2Regularization(0.0001), TestUtils.getL2Reg(l0)); assertEquals(new Adam(0.005), l0.getIUpdater()); assertArrayEquals(new int[]{3, 3}, l0.getKernelSize()); @@ -251,7 +251,7 @@ public class RegressionTest100b6 extends BaseDL4JTest { SeparableConvolution2D l1 = (SeparableConvolution2D) net.getLayer(1).getLayerConfiguration(); assertEquals(new ActivationReLU(), l1.getActivationFn()); assertEquals(8, l1.getNOut()); - assertEquals(new WeightInitXavier(), l1.getWeightInitFn()); + assertEquals(new WeightInitXavier(), l1.getWeightInit()); assertEquals(new L2Regularization(0.0001), TestUtils.getL2Reg(l1)); assertEquals(new Adam(0.005), l1.getIUpdater()); assertArrayEquals(new int[]{3, 3}, l1.getKernelSize()); @@ -277,7 +277,7 @@ public class RegressionTest100b6 extends BaseDL4JTest { DepthwiseConvolution2D l5 = (DepthwiseConvolution2D) net.getLayer(5).getLayerConfiguration(); assertEquals(new ActivationReLU(), l5.getActivationFn()); assertEquals(16, l5.getNOut()); - assertEquals(new WeightInitXavier(), l5.getWeightInitFn()); + assertEquals(new WeightInitXavier(), l5.getWeightInit()); assertEquals(new L2Regularization(0.0001), TestUtils.getL2Reg(l5)); assertEquals(new Adam(0.005), l5.getIUpdater()); assertArrayEquals(new int[]{3, 3}, l5.getKernelSize()); @@ -298,7 +298,7 @@ public class RegressionTest100b6 extends BaseDL4JTest { ConvolutionLayer l8 = (ConvolutionLayer) net.getLayer(8).getLayerConfiguration(); assertEquals(4, l8.getNOut()); - assertEquals(new WeightInitXavier(), l8.getWeightInitFn()); + assertEquals(new WeightInitXavier(), l8.getWeightInit()); assertEquals(new L2Regularization(0.0001), TestUtils.getL2Reg(l8)); assertEquals(new Adam(0.005), l8.getIUpdater()); assertArrayEquals(new int[]{4, 4}, l8.getKernelSize()); @@ -307,7 +307,7 @@ public class RegressionTest100b6 extends BaseDL4JTest { assertArrayEquals(new int[]{0, 0}, l8.getPadding()); CnnLossLayer l9 = (CnnLossLayer) net.getLayer(9).getLayerConfiguration(); - assertEquals(new WeightInitXavier(), l9.getWeightInitFn()); + assertEquals(new WeightInitXavier(), l9.getWeightInit()); assertEquals(new L2Regularization(0.0001), TestUtils.getL2Reg(l9)); assertEquals(new Adam(0.005), l9.getIUpdater()); assertEquals(new LossMAE(), l9.getLossFn()); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/KerasInitilizationTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/KerasInitilizationTest.java index e97a1685e..eec8658cc 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/KerasInitilizationTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/KerasInitilizationTest.java @@ -167,7 +167,7 @@ public class KerasInitilizationTest extends BaseDL4JTest { layerConfig.put(conf.getLAYER_FIELD_KERAS_VERSION(), kerasVersion); DenseLayer layer = new KerasDense(layerConfig, false).getDenseLayer(); - assertEquals(dl4jInitializer, layer.getWeightInitFn()); + assertEquals(dl4jInitializer, layer.getWeightInit()); } } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activation/KerasPReLUTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activation/KerasPReLUTest.java index 202e06426..053eb1fab 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activation/KerasPReLUTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activation/KerasPReLUTest.java @@ -79,7 +79,7 @@ public class KerasPReLUTest extends BaseDL4JTest { PReLULayer layer = kerasPReLU.getPReLULayer(); assertArrayEquals(layer.getInputShape(), new long[] {3, 5, 4}); - assertEquals(INIT_DL4J, layer.getWeightInitFn()); + assertEquals(INIT_DL4J, layer.getWeightInit()); assertEquals(layerName, layer.getLayerName()); } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasAtrousConvolution1DTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasAtrousConvolution1DTest.java index f5e25ea9f..10330113c 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasAtrousConvolution1DTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasAtrousConvolution1DTest.java @@ -100,7 +100,7 @@ public class KerasAtrousConvolution1DTest extends BaseDL4JTest { Convolution1DLayer layer = new KerasAtrousConvolution1D(layerConfig).getAtrousConvolution1D(); assertEquals(ACTIVATION_DL4J, layer.getActivationFn().toString()); assertEquals(LAYER_NAME, layer.getLayerName()); - assertEquals(INIT_DL4J, layer.getWeightInitFn()); + assertEquals(INIT_DL4J, layer.getWeightInit()); assertEquals(L1_REGULARIZATION, KerasTestUtils.getL1(layer), 0.0); assertEquals(L2_REGULARIZATION, KerasTestUtils.getL2(layer), 0.0); assertEquals(new Dropout(DROPOUT_DL4J), layer.getIDropout()); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasAtrousConvolution2DTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasAtrousConvolution2DTest.java index f2eebb8f2..7f1d65b3b 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasAtrousConvolution2DTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasAtrousConvolution2DTest.java @@ -114,7 +114,7 @@ public class KerasAtrousConvolution2DTest extends BaseDL4JTest { ConvolutionLayer layer = new KerasAtrousConvolution2D(layerConfig).getAtrousConvolution2D(); assertEquals(ACTIVATION_DL4J, layer.getActivationFn().toString()); assertEquals(LAYER_NAME, layer.getLayerName()); - assertEquals(INIT_DL4J, layer.getWeightInitFn()); + assertEquals(INIT_DL4J, layer.getWeightInit()); assertEquals(L1_REGULARIZATION, KerasTestUtils.getL1(layer), 0.0); assertEquals(L2_REGULARIZATION, KerasTestUtils.getL2(layer), 0.0); assertEquals(new Dropout(DROPOUT_DL4J), layer.getIDropout()); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasConvolution1DTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasConvolution1DTest.java index 994d3affe..b8629573f 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasConvolution1DTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasConvolution1DTest.java @@ -122,7 +122,7 @@ public class KerasConvolution1DTest extends BaseDL4JTest { Convolution1DLayer layer = new KerasConvolution1D(layerConfig).getConvolution1DLayer(); assertEquals(ACTIVATION_DL4J, layer.getActivationFn().toString()); assertEquals(LAYER_NAME, layer.getLayerName()); - assertEquals(INIT_DL4J, layer.getWeightInitFn()); + assertEquals(INIT_DL4J, layer.getWeightInit()); assertEquals(L1_REGULARIZATION, KerasTestUtils.getL1(layer), 0.0); assertEquals(L2_REGULARIZATION, KerasTestUtils.getL2(layer), 0.0); assertEquals(new Dropout(DROPOUT_DL4J), layer.getIDropout()); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasConvolution2DTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasConvolution2DTest.java index b92ab0432..4ba12c10f 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasConvolution2DTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasConvolution2DTest.java @@ -123,7 +123,7 @@ public class KerasConvolution2DTest extends BaseDL4JTest { ConvolutionLayer layer = new KerasConvolution2D(layerConfig).getConvolution2DLayer(); assertEquals(ACTIVATION_DL4J, layer.getActivationFn().toString()); assertEquals(LAYER_NAME, layer.getLayerName()); - assertEquals(INIT_DL4J, layer.getWeightInitFn()); + assertEquals(INIT_DL4J, layer.getWeightInit()); assertEquals(L1_REGULARIZATION, KerasTestUtils.getL1(layer), 0.0); assertEquals(L2_REGULARIZATION, KerasTestUtils.getL2(layer), 0.0); assertEquals(new Dropout(DROPOUT_DL4J), layer.getIDropout()); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasConvolution3DTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasConvolution3DTest.java index c36b0351d..f52939947 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasConvolution3DTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasConvolution3DTest.java @@ -119,7 +119,7 @@ public class KerasConvolution3DTest extends BaseDL4JTest { ConvolutionLayer layer = new KerasConvolution3D(layerConfig).getConvolution3DLayer(); assertEquals(ACTIVATION_DL4J, layer.getActivationFn().toString()); assertEquals(LAYER_NAME, layer.getLayerName()); - assertEquals(INIT_DL4J, layer.getWeightInitFn()); + assertEquals(INIT_DL4J, layer.getWeightInit()); assertEquals(L1_REGULARIZATION, KerasTestUtils.getL1(layer), 0.0); assertEquals(L2_REGULARIZATION, KerasTestUtils.getL2(layer), 0.0); assertEquals(new Dropout(DROPOUT_DL4J), layer.getIDropout()); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasDeconvolution2DTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasDeconvolution2DTest.java index c0db1c47b..9fecab86c 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasDeconvolution2DTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasDeconvolution2DTest.java @@ -123,7 +123,7 @@ public class KerasDeconvolution2DTest extends BaseDL4JTest { Deconvolution2D layer = new KerasDeconvolution2D(layerConfig).getDeconvolution2DLayer(); assertEquals(ACTIVATION_DL4J, layer.getActivationFn().toString()); assertEquals(LAYER_NAME, layer.getLayerName()); - assertEquals(INIT_DL4J, layer.getWeightInitFn()); + assertEquals(INIT_DL4J, layer.getWeightInit()); assertEquals(L1_REGULARIZATION, KerasTestUtils.getL1(layer), 0.0); assertEquals(L2_REGULARIZATION, KerasTestUtils.getL2(layer), 0.0); assertEquals(new Dropout(DROPOUT_DL4J), layer.getIDropout()); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasDepthwiseConvolution2DTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasDepthwiseConvolution2DTest.java index 4dc4856c0..eef103f98 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasDepthwiseConvolution2DTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasDepthwiseConvolution2DTest.java @@ -128,7 +128,7 @@ public class KerasDepthwiseConvolution2DTest extends BaseDL4JTest { DepthwiseConvolution2D layer = kerasLayer.getDepthwiseConvolution2DLayer(); assertEquals(ACTIVATION_DL4J, layer.getActivationFn().toString()); assertEquals(LAYER_NAME, layer.getLayerName()); - assertEquals(INIT_DL4J, layer.getWeightInitFn()); + assertEquals(INIT_DL4J, layer.getWeightInit()); assertEquals(DEPTH_MULTIPLIER, layer.getDepthMultiplier()); assertEquals(L1_REGULARIZATION, KerasTestUtils.getL1(layer), 0.0); assertEquals(L2_REGULARIZATION, KerasTestUtils.getL2(layer), 0.0); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasSeparableConvolution2DTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasSeparableConvolution2DTest.java index 54f50a478..9745ff5ed 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasSeparableConvolution2DTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasSeparableConvolution2DTest.java @@ -130,7 +130,7 @@ public class KerasSeparableConvolution2DTest extends BaseDL4JTest { SeparableConvolution2D layer = new KerasSeparableConvolution2D(layerConfig).getSeparableConvolution2DLayer(); assertEquals(ACTIVATION_DL4J, layer.getActivationFn().toString()); assertEquals(LAYER_NAME, layer.getLayerName()); - assertEquals(INIT_DL4J, layer.getWeightInitFn()); + assertEquals(INIT_DL4J, layer.getWeightInit()); assertEquals(L1_REGULARIZATION, KerasTestUtils.getL1(layer), 0.0); assertEquals(L2_REGULARIZATION, KerasTestUtils.getL2(layer), 0.0); assertEquals(DEPTH_MULTIPLIER, layer.getDepthMultiplier()); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDenseTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDenseTest.java index c9c70e5ff..637ce5915 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDenseTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDenseTest.java @@ -89,7 +89,7 @@ public class KerasDenseTest extends BaseDL4JTest { DenseLayer layer = new KerasDense(layerConfig, false).getDenseLayer(); assertEquals(ACTIVATION_DL4J, layer.getActivationFn().toString()); assertEquals(LAYER_NAME, layer.getLayerName()); - assertEquals(INIT_DL4J, layer.getWeightInitFn()); + assertEquals(INIT_DL4J, layer.getWeightInit()); assertEquals(L1_REGULARIZATION, KerasTestUtils.getL1(layer), 0.0); assertEquals(L2_REGULARIZATION, KerasTestUtils.getL2(layer), 0.0); assertEquals(new Dropout(DROPOUT_DL4J), layer.getIDropout()); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLSTMTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLSTMTest.java index 7ce6bf0b3..1bfc3a4ce 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLSTMTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLSTMTest.java @@ -38,7 +38,6 @@ import org.deeplearning4j.nn.weights.WeightInitXavier; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; -import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.Map; @@ -131,7 +130,7 @@ public class KerasLSTMTest extends BaseDL4JTest { } assertEquals(ACTIVATION_DL4J, layer.getActivationFn().toString()); assertEquals(LAYER_NAME, layer.getLayerName()); - assertEquals(INIT_DL4J, layer.getWeightInitFn()); + assertEquals(INIT_DL4J, layer.getWeightInit()); assertEquals(L1_REGULARIZATION, KerasTestUtils.getL1(layer), 0.0); assertEquals(L2_REGULARIZATION, KerasTestUtils.getL2(layer), 0.0); assertEquals(new Dropout(DROPOUT_DL4J), layer.getIDropout()); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasSimpleRnnTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasSimpleRnnTest.java index c8e8287fb..1b143a706 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasSimpleRnnTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasSimpleRnnTest.java @@ -101,7 +101,7 @@ public class KerasSimpleRnnTest extends BaseDL4JTest { (SimpleRnn) ((LastTimeStep) new KerasSimpleRnn(layerConfig).getSimpleRnnLayer()).getUnderlying(); assertEquals(ACTIVATION, layer.getActivationFn().toString()); assertEquals(LAYER_NAME, layer.getLayerName()); - assertEquals(INIT_DL4J, layer.getWeightInitFn()); + assertEquals(INIT_DL4J, layer.getWeightInit()); assertEquals(L1_REGULARIZATION, KerasTestUtils.getL1(layer), 0.0); assertEquals(L2_REGULARIZATION, KerasTestUtils.getL2(layer), 0.0); assertEquals(new Dropout(DROPOUT_DL4J), layer.getIDropout()); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/INeuralNetworkConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/INeuralNetworkConfiguration.java index 02ae2d45f..3c679267c 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/INeuralNetworkConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/INeuralNetworkConfiguration.java @@ -28,6 +28,7 @@ import org.deeplearning4j.nn.conf.NeuralNetConfiguration; public interface INeuralNetworkConfiguration extends Serializable, Cloneable { INeuralNetworkConfiguration clone(); + void init(); /** @@ -35,28 +36,4 @@ public interface INeuralNetworkConfiguration extends Serializable, Cloneable { * @return */ IModel getNet(); -} -/** - /** - * Provides a flat list of all embedded layer configurations, this - * can only be called after the layer is initialized or {@link #getLayerConfigurations()} is - * called. - * - * @return unstacked layer configurations - - List getLayerConfigurations(); - - - /** - * This uncollables any stacked layer configurations within building blocks like - * @link BuildingBlockLayer} - - void calculateInnerLayerConfigurations(); - - /** - * An implementation should provide a method to validate the network - * @return true if no errors found; false otherwise - - boolean isValid(); -} -**/ \ No newline at end of file + } \ No newline at end of file diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/ComputationGraphConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/ComputationGraphConfiguration.java index e5e94ef3c..dac126dd7 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/ComputationGraphConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/ComputationGraphConfiguration.java @@ -259,7 +259,7 @@ public class ComputationGraphConfiguration implements Serializable, Cloneable { */ private static void handleLegacyWeightInitFromJson(String json, LayerConfiguration layer, ObjectMapper mapper, JsonNode vertices) { if (layer instanceof BaseLayerConfiguration - && ((BaseLayerConfiguration) layer).getWeightInitFn() == null) { + && ((BaseLayerConfiguration) layer).getWeightInit() == null) { String layerName = layer.getLayerName(); try { @@ -291,7 +291,7 @@ public class ComputationGraphConfiguration implements Serializable, Cloneable { if (weightInit != null) { final IWeightInit wi = WeightInit.valueOf(weightInit.asText()).getWeightInitFunction(dist); - ((BaseLayerConfiguration) layer).setWeightInitFn(wi); + ((BaseLayerConfiguration) layer).setWeightInit(wi); } } catch (IOException e) { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetBaseBuilderConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetBaseBuilderConfiguration.java index a11c21adc..4f9a9bb1f 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetBaseBuilderConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetBaseBuilderConfiguration.java @@ -74,343 +74,274 @@ import org.nd4j.linalg.learning.regularization.WeightDecay; * and their hyperparameters. Hyperparameters are variables that determine how a neural network * learns. They include how many times to update the weights of the model, how to initialize those * weights, which activation function to attach to the nodes, which optimization algorithm to use, - * and how fast the model should learn. This is what one configuration would look like: - *

- * - * NeuralNetConfiguration conf = NeuralNetConfiguration.builder()
- * .weightInit(WeightInit.XAVIER) .activation(Activation.RELU)
- * .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
- * .updater(new Sgd(0.05)) //... other hyperparameters
- * .backprop(true)
- * .build();

- * - * With Deeplearning4j, you add a layer - * by calling layer on the NeuralNetConfiguration.NeuralNetConfigurationBuilder(), specifying its place in the order of + * and how fast the model should learn. This is what one configuration would look like:
+ *
+ * NeuralNetConfiguration conf = NeuralNetConfiguration.builder()
+ * .weightInit(WeightInit.XAVIER) .activation(Activation.RELU)
+ * .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
+ * .updater(new Sgd(0.05)) //... other hyperparameters
+ * .backprop(true)
+ * .build();
+ *
+ * With Deeplearning4j, you add a layer by calling layer on the + * NeuralNetConfiguration.NeuralNetConfigurationBuilder(), specifying its place in the order of * layers (the zero-indexed layer below is the input layer), the number of input and output nodes, - * nIn and nOut, as well as the type: DenseLayer.

- * - * .layer(0, new DenseLayer.Builder().nIn(784).nOut(250)
- * .build())

- * - * Once you've configured your net, you train the - * model with model.fit. + * nIn and nOut, as well as the type: DenseLayer.
+ *
+ * .layer(0, new DenseLayer.Builder().nIn(784).nOut(250)
+ * .build())
+ *
+ * Once you've configured your net, you train the model with model.fit. */ - @Data @Slf4j @EqualsAndHashCode(exclude = {"iterationCount", "epochCount"}) @JsonIgnoreProperties(ignoreUnknown = true) -//The inner builder, that we can then extend ... -@SuperBuilder //TODO fix access +// The inner builder, that we can then extend ... +@SuperBuilder // TODO fix access public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetworkConfiguration { private static final int DEFAULT_TBPTT_LENGTH = 20; - /** * Set constraints to be applied to all layers. Default: no constraints.
- * Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm regularization, - * etc). These constraints are applied at each iteration, after the parameters have been updated.
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis. + * Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm + * regularization, etc). These constraints are applied at each iteration, after the parameters + * have been updated.
+ * Note: values set by this method will be applied to all applicable layers in the network, unless + * a different value is explicitly set on a given layer. In other words: values set via this + * method are used as the default value, and can be overridden on a per-layer basis. * * @param constraints Constraints to apply to all weight parameters of all layers */ - @lombok.Builder.Default - protected final List contrainWeights = new ArrayList<>(); - - - + @lombok.Builder.Default protected final List contrainWeights = new ArrayList<>(); /** * Set constraints to be applied to all layers. Default: no constraints.
- * Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm regularization, - * etc). These constraints are applied at each iteration, after the parameters have been updated.
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis. + * Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm + * regularization, etc). These constraints are applied at each iteration, after the parameters + * have been updated.
+ * Note: values set by this method will be applied to all applicable layers in the network, unless + * a different value is explicitly set on a given layer. In other words: values set via this + * method are used as the default value, and can be overridden on a per-layer basis. * * @param constraints Constraints to apply to all bias parameters of all layers */ - @lombok.Builder.Default - protected final List biasConstraints = new ArrayList<>(); + @lombok.Builder.Default protected final List biasConstraints = new ArrayList<>(); /** * Set constraints to be applied to all layers. Default: no constraints.
- * Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm regularization, - * etc). These constraints are applied at each iteration, after the parameters have been updated.
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis. + * Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm + * regularization, etc). These constraints are applied at each iteration, after the parameters + * have been updated.
+ * Note: values set by this method will be applied to all applicable layers in the network, unless + * a different value is explicitly set on a given layer. In other words: values set via this + * method are used as the default value, and can be overridden on a per-layer basis. * * @param constraints Constraints to apply to all parameters of all layers */ @lombok.Builder.Default protected final List allParamContraints = new ArrayList<>(); /** - * This is a basic concept, a neural network is made of layers, but also can use - * another neural network as a building block. When the configuration is initialized, those - * building blocks will be flattened into a single list of layers. - * Internal ordered list of layers and inner neural networks. If the object is a NeuralNetConfiguration, - * each configuration must contain at least one layer. + * This is a basic concept, a neural network is made of layers, but also can use another neural + * network as a building block. When the configuration is initialized, those building blocks will + * be flattened into a single list of layers. Internal ordered list of layers and inner neural + * networks. If the object is a NeuralNetConfiguration, each configuration must contain at least + * one layer. */ @Getter @lombok.Builder.Default protected final List innerConfigurations = new ArrayList<>(); - @Getter - @Setter - @NonNull - @lombok.Builder.Default - @Deprecated + + @Getter @Setter @NonNull @lombok.Builder.Default @Deprecated protected WorkspaceMode trainingWorkspaceMode = WorkspaceMode.ENABLED; - @Getter - @Setter - @NonNull - @lombok.Builder.Default - @Deprecated + + @Getter @Setter @NonNull @lombok.Builder.Default @Deprecated protected WorkspaceMode inferenceWorkspaceMode = WorkspaceMode.ENABLED; /** * The type of backprop. Default setting is used for most networks (MLP, CNN etc), but optionally * truncated BPTT can be used for training recurrent neural networks. If using TruncatedBPTT make * sure you set both tBPTTForwardLength() and tBPTTBackwardLength() */ - @Getter - @Setter - @NonNull - @lombok.Builder.Default + @Getter @Setter @NonNull @lombok.Builder.Default protected BackpropType backpropType = BackpropType.Standard; - @Getter - @lombok.Builder.Default + + @Getter @lombok.Builder.Default protected Map inputPreProcessors = new HashMap<>(); /** * When doing truncated BPTT: how many steps of forward pass should we do before doing (truncated) - * backprop?
Only applicable when doing backpropType(BackpropType.TruncatedBPTT)
Typically - * tBPTTForwardLength parameter is same as the tBPTTBackwardLength parameter, but may be larger - * than it in some circumstances (but never smaller)
Ideally your training data time series - * length should be divisible by this This is the k1 parameter on pg23 of + * Only applicable when doing backpropType(BackpropType.TruncatedBPTT)
+ * Typically tBPTTForwardLength parameter is same as the tBPTTBackwardLength parameter, but may be + * larger than it in some circumstances (but never smaller)
+ * Ideally your training data time series length should be divisible by this This is the k1 + * parameter on pg23 of
http://www.cs.utoronto.ca/~ilya/pubs/ilya_sutskever_phd_thesis.pdf * * @param forwardLength Forward length > 0, >= backwardLength */ - @Getter - @Setter - @lombok.Builder.Default - protected int tbpttFwdLength = 20; + @Getter @Setter @lombok.Builder.Default protected int tbpttFwdLength = 20; /** - * When doing truncated BPTT: how many steps of backward should we do?
Only applicable when - * doing backpropType(BackpropType.TruncatedBPTT)
This is the k2 parameter on pg23 of + * Only applicable when doing backpropType(BackpropType.TruncatedBPTT)
+ * This is the k2 parameter on pg23 of
http://www.cs.utoronto.ca/~ilya/pubs/ilya_sutskever_phd_thesis.pdf * * @param backwardLength <= forwardLength */ - @Getter - @Setter - @lombok.Builder.Default - protected int tbpttBackLength = 20; - //Counter for the number of parameter updates so far - // This is important for learning rate schedules, for example, and is stored here to ensure it is persisted + @Getter @Setter @lombok.Builder.Default protected int tbpttBackLength = 20; + // Counter for the number of parameter updates so far + // This is important for learning rate schedules, for example, and is stored here to ensure it is + // persisted // for Spark and model serialization - @Getter - @Setter - @lombok.Builder.Default - protected int iterationCount = 0; - //Counter for the number of epochs completed so far. Used for per-epoch schedules - @Getter - @Setter - @lombok.Builder.Default - protected int epochCount = 0; - @lombok.Builder.Default - protected double dampingFactor = 100; - //gradient keys used for ensuring order when getting and setting the gradient - //@lombok.Builder.Default - //protected List variables = new ArrayList<>(); - @Getter - @Setter - @lombok.Builder.Default - private boolean miniBatch = false; - /** - * A seed for this network, will be random if not specified. - */ - @Getter - @Setter - @lombok.Builder.Default - private long seed = new Random().nextLong(); + @Getter @Setter @lombok.Builder.Default protected int iterationCount = 0; + // Counter for the number of epochs completed so far. Used for per-epoch schedules + @Getter @Setter @lombok.Builder.Default protected int epochCount = 0; + @lombok.Builder.Default protected double dampingFactor = 100; + // gradient keys used for ensuring order when getting and setting the gradient + // @lombok.Builder.Default + // protected List variables = new ArrayList<>(); + @Getter @Setter @lombok.Builder.Default private boolean miniBatch = false; + /** A seed for this network, will be random if not specified. */ + @Getter @Setter @lombok.Builder.Default private long seed = new Random().nextLong(); /** * The default {@link CacheMode} for this configuration. Will be set to "NONE" if not specified * otherwise. This method defines how/if preOutput cache is handled: NONE: cache disabled (default * value) HOST: Host memory will be used DEVICE: GPU memory will be used (on CPU backends effect * will be the same as for HOST) - *

- * Valid values are
CacheMode.NONE,
CacheMode.HOST or
CacheMode.DEVICE
+ * + *

Valid values are
+ * CacheMode.NONE,
+ * CacheMode.HOST or
+ * CacheMode.DEVICE
* * @param cacheMode */ - @NonNull - @Getter - @Setter - @lombok.Builder.Default - private CacheMode cacheMode = CacheMode.NONE; + @NonNull @Getter @Setter @lombok.Builder.Default private CacheMode cacheMode = CacheMode.NONE; /** * The name for this configuration. Defaults to "Anonymous INeuralNetworkConfiguration" if it is * not specified. */ - @lombok.Builder.Default - @Getter - private String name = "Anonymous INeuralNetworkConfiguration"; - /** - * The {@link InputType} of the data for this network configuration - */ - @Getter - @Setter - private InputType inputType; + @lombok.Builder.Default @Getter private String name = "Anonymous INeuralNetworkConfiguration"; + /** The {@link InputType} of the data for this network configuration */ + @Getter @Setter private InputType inputType; /** * Set the DataType for the network parameters and activations for all layers in the network. * Default: Float * * @param dataType Datatype to use for parameters and activations */ - @Getter - @Setter - @lombok.Builder.Default - @NonNull - private DataType dataType = DataType.FLOAT; + @Getter @Setter @lombok.Builder.Default @NonNull private DataType dataType = DataType.FLOAT; /** * Whether to override the nIn configuration forcibly upon construction. Default value is true. * * @return builder pattern */ - @Getter - @Setter - @lombok.Builder.Default - private boolean overrideNinUponBuild = true; + @Getter @Setter @lombok.Builder.Default private boolean overrideNinUponBuild = true; /** * Enabled by default. If enabled, the output layer configuration will be validated, to throw an - * exception on likely invalid outputs - such as softmax + nOut=1, or LossMCXENT + Tanh.
If - * disabled (false) no output layer validation will be performed.
Disabling this validation is - * not recommended, as the configurations that fail validation usually will not be able to learn - * correctly. However, the option to disable this validation is provided for advanced users when - * creating non-standard architectures. + * exception on likely invalid outputs - such as softmax + nOut=1, or LossMCXENT + Tanh.
+ * If disabled (false) no output layer validation will be performed.
+ * Disabling this validation is not recommended, as the configurations that fail validation + * usually will not be able to learn correctly. However, the option to disable this validation is + * provided for advanced users when creating non-standard architectures. * * @param validate If true: validate output layer configuration. False: don't validate */ - @Getter - @Setter - @lombok.Builder.Default - private boolean validateOutputLayerConfig = true; + @Getter @Setter @lombok.Builder.Default private boolean validateOutputLayerConfig = true; /** * Enabled by default. If enabled, an exception will be throw when using the (invalid) combination * of truncated backpropagation through time (TBPTT) with either a GlobalPoolingLayer or - * LastTimeStepLayer.
It is possible to disable this validation to allow what is almost - * certainly an invalid configuration to be used, however this is not recommended. + * LastTimeStepLayer.
+ * It is possible to disable this validation to allow what is almost certainly an invalid + * configuration to be used, however this is not recommended. * * @param validate Whether TBPTT validation should be performed */ - @Getter - @Setter - @lombok.Builder.Default - private boolean validateTbpttConfig = true; + @Getter @Setter @lombok.Builder.Default private boolean validateTbpttConfig = true; /** * Gradient updater configuration. For example, {@link org.nd4j.linalg.learning.config.Adam} or - * {@link org.nd4j.linalg.learning.config.Nesterovs}
Note: values set by this method will be - * applied to all applicable layers in the network, unless a different value is explicitly set on - * a given layer. In other words: values set via this method are used as the default value, and - * can be overridden on a per-layer basis. + * {@link org.nd4j.linalg.learning.config.Nesterovs}
+ * Note: values set by this method will be applied to all applicable layers in the network, unless + * a different value is explicitly set on a given layer. In other words: values set via this + * method are used as the default value, and can be overridden on a per-layer basis. * * @param updater Updater to use */ - @Getter - @Setter - private IUpdater updater; + @Getter @Setter private IUpdater updater; /** * Gradient normalization strategy. Used to specify gradient renormalization, gradient clipping - * etc. See {@link GradientNormalization} for details
Note: values set by this method will be - * applied to all applicable layers in the network, unless a different value is explicitly set on - * a given layer. In other words: values set via this method are used as the default value, and - * can be overridden on a per-layer basis. + * etc. See {@link GradientNormalization} for details
+ * Note: values set by this method will be applied to all applicable layers in the network, unless + * a different value is explicitly set on a given layer. In other words: values set via this + * method are used as the default value, and can be overridden on a per-layer basis. * * @param gradientNormalization Type of normalization to use. Defaults to None. * @see GradientNormalization */ - @Getter - @Setter - @NonNull - @lombok.Builder.Default + @Getter @Setter @NonNull @lombok.Builder.Default private GradientNormalization gradientNormalization = GradientNormalization.None; /** * Threshold for gradient normalization, only used for GradientNormalization.ClipL2PerLayer, * GradientNormalization.ClipL2PerParamType, and - * GradientNormalization.ClipElementWiseAbsoluteValue
Not used otherwise.
L2 threshold for - * first two types of clipping, or absolute value threshold for last type of clipping.
Note: - * values set by this method will be applied to all applicable layers in the network, unless a - * different value is explicitly set on a given layer. In other words: values set via this method - * are used as the default value, and can be overridden on a per-layer basis. + * GradientNormalization.ClipElementWiseAbsoluteValue
+ * Not used otherwise.
+ * L2 threshold for first two types of clipping, or absolute value threshold for last type of + * clipping.
+ * Note: values set by this method will be applied to all applicable layers in the network, unless + * a different value is explicitly set on a given layer. In other words: values set via this + * method are used as the default value, and can be overridden on a per-layer basis. */ - @Getter - @Setter - private double gradientNormalizationThreshold; + @Getter @Setter private double gradientNormalizationThreshold; /** - * Activation function / neuron non-linearity
Note: values set by this method will be applied - * to all applicable layers in the network, unless a different value is explicitly set on a given - * layer. In other words: values set via this method are used as the default value, and can be - * overridden on a per-layer basis. + * Activation function / neuron non-linearity
+ * Note: values set by this method will be applied to all applicable layers in the network, unless + * a different value is explicitly set on a given layer. In other words: values set via this + * method are used as the default value, and can be overridden on a per-layer basis. */ - @Getter - @Setter - private IActivation activation; - //whether to constrain the gradient to unit norm or not - @Getter - @Setter - private StepFunction stepFunction; - @Getter - @Setter - @lombok.Builder.Default - private OptimizationAlgorithm optimizationAlgo = OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT; - @Getter - @Setter - @lombok.Builder.Default - private int maxNumLineSearchIterations = 5; + @Getter @Setter private IActivation activation; + // whether to constrain the gradient to unit norm or not + @Getter @Setter private StepFunction stepFunction; + + @Getter @Setter @lombok.Builder.Default + private OptimizationAlgorithm optimizationAlgo = + OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT; + + @Getter @Setter @lombok.Builder.Default private int maxNumLineSearchIterations = 5; /** - * Set the regularization for the parameters (excluding biases) - for example {@link WeightDecay}
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis.
+ * Set the regularization for the parameters (excluding biases) - for example {@link WeightDecay} + *
+ * Note: values set by this method will be applied to all applicable layers in the network, unless + * a different value is explicitly set on a given layer. In other words: values set via this + * method are used as the default value, and can be overridden on a per-layer basis.
* - * @param regularization Regularization to apply for the network parameters/weights (excluding biases) + * @param regularization Regularization to apply for the network parameters/weights (excluding + * biases) */ - @Getter - @lombok.Builder.Default - private List regularization = new ArrayList<>(); + @Getter @lombok.Builder.Default private List regularization = new ArrayList<>(); /** * Set the regularization for the biases only - for example {@link WeightDecay}
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis.
+ * Note: values set by this method will be applied to all applicable layers in the network, unless + * a different value is explicitly set on a given layer. In other words: values set via this + * method are used as the default value, and can be overridden on a per-layer basis.
* * @param regularizationBias Regularization to apply for the network biases only */ - @Getter - @lombok.Builder.Default + @Getter @lombok.Builder.Default private List regularizationBias = new ArrayList<>(); - @Getter - @Setter - @lombok.Builder.Default - private IUpdater iUpdater = new Sgd(); + + @Getter @Setter @lombok.Builder.Default private IUpdater iUpdater = new Sgd(); /** * Gradient updater configuration, for the biases only. If not set, biases will use the updater as * set by {@link #setIUpdater(IUpdater)}
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis. + * Note: values set by this method will be applied to all applicable layers in the network, unless + * a different value is explicitly set on a given layer. In other words: values set via this + * method are used as the default value, and can be overridden on a per-layer basis. * * @param updater Updater to use for bias parameters */ - @Getter - @Setter - @lombok.Builder.Default - private IUpdater biasUpdater = null; - @Getter - @Setter - @lombok.Builder.Default + @Getter @Setter @lombok.Builder.Default private IUpdater biasUpdater = null; + + @Getter @Setter @lombok.Builder.Default private IActivation activationFn = new ActivationSigmoid(); /** * Weight initialization scheme to use, for initial weight values Note: values set by this method @@ -418,96 +349,83 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor * set on a given layer. In other words: values set via this method are used as the default value, * and can be overridden on a per-layer basis. */ - @Getter - @Setter - @lombok.Builder.Default - private IWeightInit weightInitFn = new WeightInitXavier(); + @Getter @Setter @lombok.Builder.Default private IWeightInit weightInit = new WeightInitXavier(); /** - * Sets the convolution mode for convolutional layers, which impacts padding and output sizes. - * See {@link ConvolutionMode} for details. Defaults to ConvolutionMode.TRUNCATE
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis. + * Sets the convolution mode for convolutional layers, which impacts padding and output sizes. See + * {@link ConvolutionMode} for details. Defaults to ConvolutionMode.TRUNCATE
+ * Note: values set by this method will be applied to all applicable layers in the network, unless + * a different value is explicitly set on a given layer. In other words: values set via this + * method are used as the default value, and can be overridden on a per-layer basis. + * * @param convolutionMode Convolution mode to use */ - @Getter - @Setter - @lombok.Builder.Default + @Getter @Setter @lombok.Builder.Default private ConvolutionMode convolutionMode = ConvolutionMode.Truncate; /** - * Sets the cuDNN algo mode for convolutional layers, which impacts performance and memory usage of cuDNN. - * See {@link ConvolutionLayer.AlgoMode} for details. Defaults to "PREFER_FASTEST", but "NO_WORKSPACE" uses less memory. - *
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis. + * Sets the cuDNN algo mode for convolutional layers, which impacts performance and memory usage + * of cuDNN. See {@link ConvolutionLayer.AlgoMode} for details. Defaults to "PREFER_FASTEST", but + * "NO_WORKSPACE" uses less memory.
+ * Note: values set by this method will be applied to all applicable layers in the network, unless + * a different value is explicitly set on a given layer. In other words: values set via this + * method are used as the default value, and can be overridden on a per-layer basis. + * * @param cudnnAlgoMode cuDNN algo mode to use */ - @Getter - @Setter - @lombok.Builder.Default + @Getter @Setter @lombok.Builder.Default private ConvolutionLayer.AlgoMode cudnnAlgoMode = ConvolutionLayer.AlgoMode.PREFER_FASTEST; - @Getter - @Setter - @lombok.Builder.Default - private boolean minimize = true; + + @Getter @Setter @lombok.Builder.Default private boolean minimize = true; /** * Set the dropout for all layers in this network
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis. - * * Dropout probability. This is the probability of retaining each input activation value for a layer. - * * dropOut(x) will keep an input activation with probability x, and set to 0 with probability 1-x.
- * * dropOut(0.0) is a special value / special case - when set to 0.0., dropout is disabled (not applied). Note - * * that a dropout value of 1.0 is functionally equivalent to no dropout: i.e., 100% probability of retaining - * * each input activation.
- * *

- * * Note 1: Dropout is applied at training time only - and is automatically not applied at test time - * * (for evaluation, etc)
- * * Note 2: This sets the probability per-layer. Care should be taken when setting lower values for - * * complex networks (too much information may be lost with aggressive (very low) dropout values).
- * * Note 3: Frequently, dropout is not applied to (or, has higher retain probability for) input (first layer) - * * layers. Dropout is also often not applied to output layers. This needs to be handled MANUALLY by the user - * * - set .dropout(0) on those layers when using global dropout setting.
- * * Note 4: Implementation detail (most users can ignore): DL4J uses inverted dropout, as described here: - * * http://cs231n.github.io/neural-networks-2/ - * *

- * *
- * * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * * value, and can be overridden on a per-layer basis. - * * - * * @param inputRetainProbability Dropout probability (probability of retaining each input activation value for a layer) - * * @see #dropOut(IDropout) + * Note: values set by this method will be applied to all applicable layers in the network, unless + * a different value is explicitly set on a given layer. In other words: values set via this + * method are used as the default value, and can be overridden on a per-layer basis. * Dropout + * probability. This is the probability of retaining each input activation value for a + * layer. * dropOut(x) will keep an input activation with probability x, and set to 0 with + * probability 1-x.
+ * * dropOut(0.0) is a special value / special case - when set to 0.0., dropout is disabled (not + * applied). Note * that a dropout value of 1.0 is functionally equivalent to no dropout: i.e., + * 100% probability of retaining * each input activation.
+ * * * + *

* Note 1: Dropout is applied at training time only - and is automatically not applied at + * test time * (for evaluation, etc)
+ * * Note 2: This sets the probability per-layer. Care should be taken when setting lower values + * for * complex networks (too much information may be lost with aggressive (very low) dropout + * values).
+ * * Note 3: Frequently, dropout is not applied to (or, has higher retain probability for) input + * (first layer) * layers. Dropout is also often not applied to output layers. This needs to be + * handled MANUALLY by the user * - set .dropout(0) on those layers when using global dropout + * setting.
+ * * Note 4: Implementation detail (most users can ignore): DL4J uses inverted dropout, as + * described here: * http://cs231n.github.io/neural-networks-2/ + * * *
+ * * Note: values set by this method will be applied to all applicable layers in the network, + * unless a different * value is explicitly set on a given layer. In other words: values set via + * this method are used as the default * value, and can be overridden on a per-layer basis. * + * * @param inputRetainProbability Dropout probability (probability of retaining each input + * activation value for a layer) * @see #dropOut(IDropout) * - * @param dropout Dropout, such as {@link Dropout}, {@link org.deeplearning4j.nn.conf.dropout.GaussianDropout}, - * {@link org.deeplearning4j.nn.conf.dropout.GaussianNoise} etc + * @param dropout Dropout, such as {@link Dropout}, {@link + * org.deeplearning4j.nn.conf.dropout.GaussianDropout}, {@link + * org.deeplearning4j.nn.conf.dropout.GaussianNoise} etc * @return */ - @Getter - @Setter - private IDropout idropOut; + @Getter @Setter private IDropout idropOut; /** * Set the weight noise (such as {@link org.deeplearning4j.nn.conf.weightnoise.DropConnect} and * {@link org.deeplearning4j.nn.conf.weightnoise.WeightNoise}) for the layers in this network.
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis. + * Note: values set by this method will be applied to all applicable layers in the network, unless + * a different value is explicitly set on a given layer. In other words: values set via this + * method are used as the default value, and can be overridden on a per-layer basis. * * @param weightNoise Weight noise instance to use */ - @Getter - @Setter - private IWeightNoise weightNoise; - @Getter - @Setter - @lombok.Builder.Default - private double biasInit = 0.0; - @Getter - @Setter - @lombok.Builder.Default - private double gainInit = 1.0; + @Getter @Setter private IWeightNoise weightNoise; + + @Getter @Setter @lombok.Builder.Default private double biasInit = 0.0; + @Getter @Setter @lombok.Builder.Default private double gainInit = 1.0; /** * Handle {@link WeightInit} and {@link Distribution} from legacy configs in Json format. Copied @@ -515,10 +433,10 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor * * @return True if all is well and layer iteration shall continue. False else-wise. */ - private static boolean handleLegacyWeightInitFromJson(String json, LayerConfiguration l, - ObjectMapper mapper, - JsonNode confs, int layerCount) { - if ((l instanceof BaseLayerConfiguration) && ((BaseLayerConfiguration) l).getWeightInitFn() == null) { + private static boolean handleLegacyWeightInitFromJson( + String json, LayerConfiguration l, ObjectMapper mapper, JsonNode confs, int layerCount) { + if ((l instanceof BaseLayerConfiguration) + && ((BaseLayerConfiguration) l).getWeightInit() == null) { try { JsonNode jsonNode = mapper.readTree(json); if (confs == null) { @@ -528,7 +446,7 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor ArrayNode layerConfs = (ArrayNode) confs; JsonNode outputLayerNNCNode = layerConfs.get(layerCount); if (outputLayerNNCNode == null) { - return false; //Should never happen... + return false; // Should never happen... } JsonNode layerWrapperNode = outputLayerNNCNode.get("layer"); @@ -537,8 +455,8 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor } JsonNode layerNode = layerWrapperNode.elements().next(); - JsonNode weightInit = layerNode.get( - "weightInit"); //Should only have 1 element: "dense", "output", etc + JsonNode weightInit = + layerNode.get("weightInit"); // Should only have 1 element: "dense", "output", etc JsonNode distribution = layerNode.get("dist"); Distribution dist = null; @@ -547,9 +465,9 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor } if (weightInit != null) { - final IWeightInit wi = WeightInit.valueOf(weightInit.asText()) - .getWeightInitFunction(dist); - ((BaseLayerConfiguration) l).setWeightInitFn(wi); + final IWeightInit wi = + WeightInit.valueOf(weightInit.asText()).getWeightInitFunction(dist); + ((BaseLayerConfiguration) l).setWeightInit(wi); } } @@ -560,7 +478,6 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor } } return true; - } /** @@ -582,10 +499,9 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor } public static NeuralNetBaseBuilderConfiguration fromYaml(String input) { - throw new RuntimeException("Needs fixing - not supported."); //TODO + throw new RuntimeException("Needs fixing - not supported."); // TODO } - /** * @return JSON representation of NN configuration */ @@ -606,8 +522,10 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor public String toJson() { ObjectMapper mapper = NeuralNetBaseBuilderConfiguration.mapper(); synchronized (mapper) { - //JSON mappers are supposed to be thread safe: however, in practice they seem to miss fields occasionally - //when writeValueAsString is used by multiple threads. This results in invalid JSON. See issue #3243 + // JSON mappers are supposed to be thread safe: however, in practice they seem to miss fields + // occasionally + // when writeValueAsString is used by multiple threads. This results in invalid JSON. See + // issue #3243 try { return mapper.writeValueAsString(this); } catch (com.fasterxml.jackson.core.JsonProcessingException e) { @@ -616,18 +534,52 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor } } - public abstract static class NeuralNetBaseBuilderConfigurationBuilder - > { + @Override + public NeuralNetBaseBuilderConfiguration clone() { + NeuralNetBaseBuilderConfiguration clone; + try { + clone = (NeuralNetBaseBuilderConfiguration) super.clone(); + } catch (CloneNotSupportedException ex) { + throw new RuntimeException(ex); + } + if (clone.stepFunction != null) { + clone.stepFunction = clone.stepFunction.clone(); + } + /** if (clone.variables != null) { clone.variables = new ArrayList<>(clone.variables); } */ + clone.getInnerConfigurations().addAll(innerConfigurations); - List innerConfigurations$value = new ArrayList<>(); //initialize with an empty list + if (clone.getInputPreProcessors() != null) { + Map map = new HashMap<>(); + for (Map.Entry entry : clone.getInputPreProcessors().entrySet()) { + map.put(entry.getKey(), entry.getValue().clone()); + } + clone.getInputPreProcessors().clear(); + clone.getInputPreProcessors().putAll(map); + } + + clone.setInferenceWorkspaceMode(this.inferenceWorkspaceMode); + clone.setTrainingWorkspaceMode(this.trainingWorkspaceMode); + clone.setCacheMode(this.cacheMode); + clone.setValidateOutputLayerConfig(this.validateOutputLayerConfig); + clone.setDataType(this.dataType); + + return clone; + } + + public abstract static class NeuralNetBaseBuilderConfigurationBuilder< + C extends NeuralNetBaseBuilderConfiguration, + B extends NeuralNetBaseBuilderConfiguration.NeuralNetBaseBuilderConfigurationBuilder> { + + List innerConfigurations$value = new ArrayList<>(); // initialize with an empty list /** * Set constraints to be applied to all layers. Default: no constraints.
- * Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm regularization, - * etc). These constraints are applied at each iteration, after the parameters have been updated.
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis. + * Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm + * regularization, etc). These constraints are applied at each iteration, after the parameters + * have been updated.
+ * Note: values set by this method will be applied to all applicable layers in the network, + * unless a different value is explicitly set on a given layer. In other words: values set via + * this method are used as the default value, and can be overridden on a per-layer basis. * * @param constraints Constraints to apply to all weight parameters of all layers */ @@ -638,32 +590,35 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor } /** - * For the (perhaps partially constructed) network configuration, return a list of activation sizes for each - * layer in the network.
- * Note: To use this method, the network input type must have been set using {@link #setInputType(InputType)} first + * For the (perhaps partially constructed) network configuration, return a list of activation + * sizes for each layer in the network.
+ * Note: To use this method, the network input type must have been set using {@link + * #setInputType(InputType)} first + * * @return A list of activation types for the network, indexed by layer number */ - public List getLayerActivationTypes(){ - Preconditions.checkState(inputType != null, "Can only calculate activation types if input type has" + - "been set. Use setInputType(InputType)"); - - - throw new RuntimeException("Error calculating layer activation types: error instantiating MultiLayerConfiguration"); + public List getLayerActivationTypes() { + Preconditions.checkState( + inputType != null, + "Can only calculate activation types if input type has" + + "been set. Use setInputType(InputType)"); + throw new RuntimeException( + "Error calculating layer activation types: error instantiating MultiLayerConfiguration"); } - /** * Set constraints to be applied to all layers. Default: no constraints.
- * Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm regularization, - * etc). These constraints are applied at each iteration, after the parameters have been updated.
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis. + * Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm + * regularization, etc). These constraints are applied at each iteration, after the parameters + * have been updated.
+ * Note: values set by this method will be applied to all applicable layers in the network, + * unless a different value is explicitly set on a given layer. In other words: values set via + * this method are used as the default value, and can be overridden on a per-layer basis. * * @param constraints Constraints to apply to all parameters of all layers */ - public B constrainAllParameters(LayerConstraint... constraints){ + public B constrainAllParameters(LayerConstraint... constraints) { allParamContraints$value = Arrays.asList(constraints); allParamContraints$set = true; return (B) this; @@ -671,11 +626,12 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor /** * Set constraints to be applied to all layers. Default: no constraints.
- * Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm regularization, - * etc). These constraints are applied at each iteration, after the parameters have been updated.
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis. + * Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm + * regularization, etc). These constraints are applied at each iteration, after the parameters + * have been updated.
+ * Note: values set by this method will be applied to all applicable layers in the network, + * unless a different value is explicitly set on a given layer. In other words: values set via + * this method are used as the default value, and can be overridden on a per-layer basis. * * @param constraints Constraints to apply to all bias parameters of all layers */ @@ -692,14 +648,12 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor * @param processor what to use to preProcess the data. * @return builder pattern */ - public B inputPreProcessor(Integer layer, - InputPreProcessor processor) { + public B inputPreProcessor(Integer layer, InputPreProcessor processor) { inputPreProcessors$value.put(layer, processor); inputPreProcessors$set = true; return (B) this; } - /** * Set layer at index * @@ -725,14 +679,12 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor return (B) this; } - //TODO this is a dirty workaround + // TODO this is a dirty workaround public boolean isOverrideNinUponBuild() { return isOverrideNinUponBuild(); } - /** - * Specify additional layer configurations - */ + /** Specify additional layer configurations */ @Deprecated public B layersFromArray(@NonNull LayerConfiguration[] arrLayers) { innerConfigurations$value.addAll(List.of(arrLayers)); @@ -740,9 +692,7 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor return (B) this; } - /** - * Specify additional layer configurations - */ + /** Specify additional layer configurations */ @Deprecated public B layersFromList(@NonNull List listLayers) { innerConfigurations$value.addAll(listLayers); @@ -750,15 +700,14 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor return (B) this; } - /** - * L1 regularization coefficient for the weights (excluding biases).
Note: values set by - * this method will be applied to all applicable layers in the network, unless a different value - * is explicitly set on a given layer. In other words: values set via this method are used as - * the default value, and can be overridden on a per-layer basis. + * L1 regularization coefficient for the weights (excluding biases).
+ * Note: values set by this method will be applied to all applicable layers in the network, + * unless a different value is explicitly set on a given layer. In other words: values set via + * this method are used as the default value, and can be overridden on a per-layer basis. */ public B l1(double l1) { - //Check if existing L1 exists; if so, replace it + // Check if existing L1 exists; if so, replace it NetworkUtils.removeInstances(regularization$value, L1Regularization.class); if (l1 > 0.0) { regularization$value.add(new L1Regularization(l1)); @@ -770,21 +719,23 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor /** * L2 regularization coefficient for the weights (excluding biases).
* Note: Generally, {@link WeightDecay} (set via {@link #weightDecay(double)} should be - * preferred to - * L2 regularization. See {@link WeightDecay} javadoc for further details.
Note: values set - * by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used - * as the default value, and can be overridden on a per-layer basis.
Note: L2 regularization - * and weight decay usually should not be used together; if any weight decay (or L2) has been - * added for the biases, these will be removed first. + * preferred to L2 regularization. See {@link WeightDecay} javadoc for further details.
+ * Note: values set by this method will be applied to all applicable layers in the network, + * unless a different value is explicitly set on a given layer. In other words: values set via + * this method are used as the default value, and can be overridden on a per-layer basis.
+ * Note: L2 regularization and weight decay usually should not be used together; if any weight + * decay (or L2) has been added for the biases, these will be removed first. * * @see #weightDecay(double, boolean) */ public B l2(double l2) { - //Check if existing L2 exists; if so, replace it. Also remove weight decay - it doesn't make sense to use both + // Check if existing L2 exists; if so, replace it. Also remove weight decay - it doesn't make + // sense to use both NetworkUtils.removeInstances(regularization$value, L2Regularization.class); if (l2 > 0.0) { - NetworkUtils.removeInstancesWithWarning(regularization$value, WeightDecay.class, + NetworkUtils.removeInstancesWithWarning( + regularization$value, + WeightDecay.class, "WeightDecay regularization removed: incompatible with added L2 regularization"); regularization$value.add(new L2Regularization(l2)); } @@ -793,10 +744,10 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor } /** - * L1 regularization coefficient for the bias.
Note: values set by this method will be - * applied to all applicable layers in the network, unless a different value is explicitly set - * on a given layer. In other words: values set via this method are used as the default value, - * and can be overridden on a per-layer basis. + * L1 regularization coefficient for the bias.
+ * Note: values set by this method will be applied to all applicable layers in the network, + * unless a different value is explicitly set on a given layer. In other words: values set via + * this method are used as the default value, and can be overridden on a per-layer basis. */ public B l1Bias(double l1Bias) { NetworkUtils.removeInstances(regularizationBias$value, L1Regularization.class); @@ -809,21 +760,23 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor /** * L2 regularization coefficient for the bias.
- * Note: Generally, {@link WeightDecay} (set via {@link #weightDecayBias(double, boolean)} - * should be preferred to - * L2 regularization. See {@link WeightDecay} javadoc for further details.
Note: values set - * by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used - * as the default value, and can be overridden on a per-layer basis.
Note: L2 regularization - * and weight decay usually should not be used together; if any weight decay (or L2) has been - * added for the biases, these will be removed first. + * Note: Generally, {@link WeightDecay} (set via {@link #weightDecayBias(double, + * boolean)} should be preferred to L2 regularization. See {@link WeightDecay} javadoc for + * further details.
+ * Note: values set by this method will be applied to all applicable layers in the network, + * unless a different value is explicitly set on a given layer. In other words: values set via + * this method are used as the default value, and can be overridden on a per-layer basis.
+ * Note: L2 regularization and weight decay usually should not be used together; if any weight + * decay (or L2) has been added for the biases, these will be removed first. * * @see #weightDecayBias(double, boolean) */ public B l2Bias(double l2Bias) { NetworkUtils.removeInstances(regularizationBias$value, L2Regularization.class); if (l2Bias > 0.0) { - NetworkUtils.removeInstancesWithWarning(regularizationBias$value, WeightDecay.class, + NetworkUtils.removeInstancesWithWarning( + regularizationBias$value, + WeightDecay.class, "L2 bias regularization removed: incompatible with added WeightDecay regularization"); regularizationBias$value.add(new L2Regularization(l2Bias)); } @@ -831,12 +784,12 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor } /** - * Add weight decay regularization for the network parameters (excluding biases).
This - * applies weight decay with multiplying the learning rate - see {@link WeightDecay} for - * more details.
Note: values set by this method will be applied to all applicable layers in - * the network, unless a different value is explicitly set on a given layer. In other words: - * values set via this method are used as the default value, and can be overridden on a - * per-layer basis.
+ * Add weight decay regularization for the network parameters (excluding biases).
+ * This applies weight decay with multiplying the learning rate - see {@link WeightDecay} + * for more details.
+ * Note: values set by this method will be applied to all applicable layers in the network, + * unless a different value is explicitly set on a given layer. In other words: values set via + * this method are used as the default value, and can be overridden on a per-layer basis.
* * @param coefficient Weight decay regularization coefficient * @see #weightDecay(double, boolean) @@ -846,22 +799,25 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor } /** - * Add weight decay regularization for the network parameters (excluding biases). See - * {@link WeightDecay} for more details.
Note: values set by this method will be applied to - * all applicable layers in the network, unless a different value is explicitly set on a given - * layer. In other words: values set via this method are used as the default value, and can be - * overridden on a per-layer basis.
+ * Add weight decay regularization for the network parameters (excluding biases). See {@link + * WeightDecay} for more details.
+ * Note: values set by this method will be applied to all applicable layers in the network, + * unless a different value is explicitly set on a given layer. In other words: values set via + * this method are used as the default value, and can be overridden on a per-layer basis.
* * @param coefficient Weight decay regularization coefficient - * @param applyLR Whether the learning rate should be multiplied in when performing weight - * decay updates. See {@link WeightDecay} for more details. + * @param applyLR Whether the learning rate should be multiplied in when performing weight decay + * updates. See {@link WeightDecay} for more details. * @see #weightDecay(double, boolean) */ public B weightDecay(double coefficient, boolean applyLR) { - //Check if existing weight decay if it exists; if so, replace it. Also remove L2 - it doesn't make sense to use both + // Check if existing weight decay if it exists; if so, replace it. Also remove L2 - it doesn't + // make sense to use both NetworkUtils.removeInstances(regularization$value, WeightDecay.class); if (coefficient > 0.0) { - NetworkUtils.removeInstancesWithWarning(regularization$value, L2Regularization.class, + NetworkUtils.removeInstancesWithWarning( + regularization$value, + L2Regularization.class, "L2 regularization removed: incompatible with added WeightDecay regularization"); regularization$value.add(new WeightDecay(coefficient, applyLR)); } @@ -871,10 +827,10 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor /** * Weight decay for the biases only - see {@link #weightDecay(double)} for more details. This - * applies weight decay with multiplying the learning rate.
Note: values set by this - * method will be applied to all applicable layers in the network, unless a different value is - * explicitly set on a given layer. In other words: values set via this method are used as the - * default value, and can be overridden on a per-layer basis.
+ * applies weight decay with multiplying the learning rate.
+ * Note: values set by this method will be applied to all applicable layers in the network, + * unless a different value is explicitly set on a given layer. In other words: values set via + * this method are used as the default value, and can be overridden on a per-layer basis.
* * @param coefficient Weight decay regularization coefficient * @see #weightDecayBias(double, boolean) @@ -892,10 +848,13 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor * @param coefficient Weight decay regularization coefficient */ public B weightDecayBias(double coefficient, boolean applyLR) { - //Check if existing weight decay if it exists; if so, replace it. Also remove L2 - it doesn't make sense to use both + // Check if existing weight decay if it exists; if so, replace it. Also remove L2 - it doesn't + // make sense to use both NetworkUtils.removeInstances(regularizationBias$value, WeightDecay.class); if (coefficient > 0) { - NetworkUtils.removeInstancesWithWarning(regularizationBias$value, L2Regularization.class, + NetworkUtils.removeInstancesWithWarning( + regularizationBias$value, + L2Regularization.class, "L2 bias regularization removed: incompatible with added WeightDecay regularization"); regularizationBias$value.add(new WeightDecay(coefficient, applyLR)); } @@ -904,25 +863,19 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor } /** - * Activation function / neuron non-linearity
Note: values set by this method will be - * applied to all applicable layers in the network, unless a different value is explicitly set - * on a given layer. In other words: values set via this method are used as the default value, - * and can be overridden on a per-layer basis. + * Activation function / neuron non-linearity
+ * Note: values set by this method will be applied to all applicable layers in the network, + * unless a different value is explicitly set on a given layer. In other words: values set via + * this method are used as the default value, and can be overridden on a per-layer basis. */ @Deprecated public B activation(@NonNull Activation activation) { return (B) activationFn(activation.getActivationFunction()); } - - - @Deprecated - public B weightInit(@NonNull WeightInit wi) { - return (B) weightInitFn(wi.getWeightInitFunction()); - } - /** * legacy code, does nothing + * * @return */ @Deprecated @@ -930,7 +883,6 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor return (B) this; } - /** * Set weight initialization scheme to random sampling via the specified distribution. * Equivalent to: {@code .weightInit(new WeightInitDistribution(distribution))} Note: values set @@ -941,11 +893,26 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor * @param distribution Distribution to use for weight initialization */ public B weightInit(@NonNull Distribution distribution) { - return (B) weightInitFn(new WeightInitDistribution(distribution)); + this.weightInit$value = new WeightInitDistribution(distribution); + this.weightInit$set = true; + return (B) this; + } + + public B weightInit(@NonNull WeightInit weightInit) { + this.weightInit$value = weightInit.getWeightInitFunction(); + this.weightInit$set = true; + return (B) this; + } + + public B weightInit(@NonNull IWeightInit iWeightInit) { + this.weightInit$value = iWeightInit; + this.weightInit$set = true; + return (B) this; } /** * Same as {@link #weightInit(Distribution)}. + * * @param distribution * @return */ @@ -959,61 +926,25 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor /** * Creates a new {@link Dropout} and sets the dropout in the builder for this configuration + * * @param dropout activationRetainProbability * @return builder */ - public B dropOut( double dropout) { - return (B) idropOut( new Dropout(dropout)); + public B dropOut(double dropout) { + return (B) idropOut(new Dropout(dropout)); } /** * Add multiple inner neural net configurations at once + * * @param confs list of configurations * @return builder */ @Deprecated public B confs(@NonNull List confs) { innerConfigurations$value.addAll(confs); - innerConfigurations$set=true; + innerConfigurations$set = true; return (B) this; } } - - @Override - public NeuralNetBaseBuilderConfiguration clone() { - NeuralNetBaseBuilderConfiguration clone; - try { - clone = (NeuralNetBaseBuilderConfiguration) super.clone(); - } catch(CloneNotSupportedException ex) { - throw new RuntimeException(ex); - } - if (clone.stepFunction != null) { - clone.stepFunction = clone.stepFunction.clone(); - } - /** - if (clone.variables != null) { - clone.variables = new ArrayList<>(clone.variables); - } - **/ - - clone.getInnerConfigurations().addAll(innerConfigurations); - - if (clone.getInputPreProcessors() != null) { - Map map = new HashMap<>(); - for (Map.Entry entry : clone.getInputPreProcessors().entrySet()) { - map.put(entry.getKey(), entry.getValue().clone()); - } - clone.getInputPreProcessors().clear(); - clone.getInputPreProcessors().putAll(map); - } - - clone.setInferenceWorkspaceMode(this.inferenceWorkspaceMode); - clone.setTrainingWorkspaceMode(this.trainingWorkspaceMode); - clone.setCacheMode(this.cacheMode); - clone.setValidateOutputLayerConfig(this.validateOutputLayerConfig); - clone.setDataType(this.dataType); - - return clone; - - } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java index ed5a406b4..fe946e022 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java @@ -35,15 +35,11 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.stream.Collectors; -import lombok.Data; -import lombok.EqualsAndHashCode; -import lombok.Getter; -import lombok.NonNull; -import lombok.Setter; + +import lombok.*; import lombok.experimental.SuperBuilder; import lombok.extern.jackson.Jacksonized; import lombok.extern.slf4j.Slf4j; -import lombok.val; import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.api.layers.LayerConstraint; @@ -67,9 +63,9 @@ import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.conf.memory.MemoryReport; import org.deeplearning4j.nn.conf.memory.NetworkMemoryReport; import org.deeplearning4j.nn.conf.serde.JsonMappers; +import org.deeplearning4j.nn.conf.stepfunctions.DefaultStepFunction; import org.deeplearning4j.nn.conf.stepfunctions.StepFunction; import org.deeplearning4j.nn.conf.weightnoise.IWeightNoise; -import org.deeplearning4j.nn.conf.weightnoise.WeightNoise; import org.deeplearning4j.nn.weights.IWeightInit; import org.deeplearning4j.nn.weights.WeightInit; import org.deeplearning4j.util.OutputLayerUtil; @@ -319,16 +315,14 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { private boolean validateTbpttConfig = true; /** * Gradient updater configuration. For example, {@link org.nd4j.linalg.learning.config.Adam} or - * {@link org.nd4j.linalg.learning.config.Nesterovs}
Note: values set by this method will be - * applied to all applicable layers in the network, unless a different value is explicitly set on - * a given layer. In other words: values set via this method are used as the default value, and - * can be overridden on a per-layer basis. + * {@link org.nd4j.linalg.learning.config.Nesterovs}
+ * Note: values set by this method will be applied to all applicable layers in the network, unless + * a different value is explicitly set on a given layer. In other words: values set via this + * method are used as the default value, and can be overridden on a per-layer basis. * * @param updater Updater to use */ - @Getter - @Setter - private IUpdater updater; + @Getter @Setter @Builder.Default private IUpdater updater = new Sgd(); /** * Gradient normalization strategy. Used to specify gradient renormalization, gradient clipping * etc. See {@link GradientNormalization} for details
Note: values set by this method will be @@ -357,19 +351,9 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { @Setter private double gradientNormalizationThreshold; - /** - * Activation function / neuron non-linearity
Note: values set by this method will be applied - * to all applicable layers in the network, unless a different value is explicitly set on a given - * layer. In other words: values set via this method are used as the default value, and can be - * overridden on a per-layer basis. - */ - @Getter - @Setter - private IActivation activation; - //whether to constrain the gradient to unit norm or not - @Getter - @Setter - private StepFunction stepFunction; + // whether to constrain the gradient to unit norm or not + @Getter @Setter @Builder.Default private StepFunction stepFunction = new DefaultStepFunction(); + @Getter @Setter @lombok.Builder.Default @@ -400,13 +384,10 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { @Getter @lombok.Builder.Default private List regularizationBias = new ArrayList<>(); - @Getter - @Setter - @lombok.Builder.Default - private IUpdater iUpdater = new Sgd(); + /** * Gradient updater configuration, for the biases only. If not set, biases will use the updater as - * set by {@link #setIUpdater(IUpdater)}
+ * set by {@link #setUpdater(IUpdater)}
* Note: values set by this method will be applied to all applicable layers in the network, unless a different * value is explicitly set on a given layer. In other words: values set via this method are used as the default * value, and can be overridden on a per-layer basis. @@ -420,7 +401,7 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { @Getter @Setter @lombok.Builder.Default - private IActivation activationFn = new ActivationSigmoid(); + private IActivation activation = new ActivationSigmoid(); /** * Sets the convolution mode for convolutional layers, which impacts padding and output sizes. @@ -698,7 +679,7 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { private static boolean handleLegacyWeightInitFromJson(String json, LayerConfiguration l, ObjectMapper mapper, JsonNode confs, int layerCount) { - if ((l instanceof BaseLayerConfiguration) && ((BaseLayerConfiguration) l).getWeightInitFn() == null) { + if ((l instanceof BaseLayerConfiguration) && ((BaseLayerConfiguration) l).getWeightInit() == null) { try { JsonNode jsonNode = mapper.readTree(json); if (confs == null) { @@ -729,7 +710,7 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { if (weightInit != null) { final IWeightInit wi = WeightInit.valueOf(weightInit.asText()) .getWeightInitFunction(dist); - ((BaseLayerConfiguration) l).setWeightInitFn(wi); + ((BaseLayerConfiguration) l).setWeightInit(wi); } } @@ -851,8 +832,8 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { * that do not have an individual setting (nor a default) */ for(LayerConfiguration lconf : this.getFlattenedLayerConfigurations()) { - if(lconf.getActivationFn() == null ) lconf.setActivationFn(this.getActivationFn()); - if(lconf.getIUpdater() == null ) lconf.setIUpdater( this.getIUpdater() ); + if(lconf.getActivationFn() == null ) lconf.setActivationFn(this.getActivation()); + if(lconf.getIUpdater() == null ) lconf.setIUpdater( this.getUpdater() ); if(lconf.getIDropout() == null ) lconf.setIDropout( this.getIdropOut() ); if(lconf.getWeightNoise() == null ) lconf.setWeightNoise( this.getWeightNoise()); @@ -1108,29 +1089,27 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { */ public List getFlattenedLayerConfigurations(NeuralNetConfiguration conf) { List ret = new ArrayList<>(); //create the final return list - for( Object obj : conf.getInnerConfigurations().stream().skip(1) //don't include self - .collect(Collectors.toList())) { - //if Layer Config, include in list and inherit parameters from this conf - //else if neural net configuration, call self recursively to resolve layer configurations - if (obj instanceof LayerConfiguration) - ret.add((LayerConfiguration) obj); - else if (obj instanceof NeuralNetConfiguration) - ret.addAll(getFlattenedLayerConfigurations( - (NeuralNetConfiguration) obj)); - else { - log.error( - "The list of layers and neural network configurations does contain an object of {}. Element will be ignored.", - obj.getClass().getSimpleName()); - } - } - /** - LayerConfiguration lc = ((LayerConfiguration) lc).getType().getClazz().cast(obj); - switch(lc.getType()) { - case FC: { //fully connected layer - ((FeedForwardLayer) lc).setWeightInitFn(this.getWeightInitFn()); - } - if(lc instanceof FeedForwardLayer && ((FeedForwardLayer) lc).getWeightInitFn() == null) { - **/ + //When properly initialized, _this_ configuration is set first in the list, however we + //can find cases where this is not true, thus the first configuration is another net or layer configuration + //and should not be skipped. In essence, skip first configuration if that is "this". + int iSkip = 0; + if(conf.getInnerConfigurations().size()>0 && conf.getInnerConfigurations().get(0).equals(this)) { iSkip=1;} + conf.getInnerConfigurations().stream().skip(iSkip) + .forEach(obj -> { + //if Layer Config, include in list and inherit parameters from this conf + //else if neural net configuration, call self recursively to resolve layer configurations + if (obj instanceof LayerConfiguration) { + ((LayerConfiguration) obj).setNetConfiguration(conf); + ret.add((LayerConfiguration) obj); + } else if (obj instanceof NeuralNetConfiguration) + ret.addAll(getFlattenedLayerConfigurations( + (NeuralNetConfiguration) obj)); + else { + log.error( + "The list of layers and neural network configurations does contain an object of {}. Element will be ignored.", + obj.getClass().getSimpleName()); + } + }); return ret; } @@ -1143,17 +1122,6 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { return getFlattenedLayerConfigurations(this); } - - /** - * Get the configuration of the first layer - * @return layer configuration - */ - /** - public LayerConfiguration getFirstLayer() { - return getFlattenedLayerConfigurations().get(0); - } -**/ - /** * Add a new layer to the first position * @param layer configuration diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseLayerConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseLayerConfiguration.java index 121f9b38f..5f99c8082 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseLayerConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseLayerConfiguration.java @@ -23,6 +23,7 @@ package org.deeplearning4j.nn.conf.layers; import lombok.*; import org.deeplearning4j.nn.api.ITraininableLayerConfiguration; import org.deeplearning4j.nn.conf.GradientNormalization; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.distribution.Distribution; import org.deeplearning4j.nn.conf.weightnoise.IWeightNoise; @@ -30,6 +31,7 @@ import org.deeplearning4j.nn.weights.IWeightInit; import org.deeplearning4j.nn.weights.WeightInit; import org.deeplearning4j.nn.weights.WeightInitDistribution; import org.deeplearning4j.util.NetworkUtils; +import org.jetbrains.annotations.NotNull; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.api.buffer.DataType; @@ -52,7 +54,7 @@ import java.util.List; public abstract class BaseLayerConfiguration extends LayerConfiguration implements ITraininableLayerConfiguration, Serializable, Cloneable { @NonNull - protected IWeightInit weightInitFn; + protected IWeightInit weightInit; protected double biasInit = 0.0; protected double gainInit = 0.0; protected List regularization; @@ -68,7 +70,7 @@ public abstract class BaseLayerConfiguration extends LayerConfiguration implemen public BaseLayerConfiguration(Builder builder) { super(builder); this.layerName = builder.layerName; - this.weightInitFn = builder.weightInitFn; + this.weightInit = builder.weightInit; this.biasInit = builder.biasInit; this.gainInit = builder.gainInit; this.regularization = builder.regularization; @@ -89,7 +91,7 @@ public abstract class BaseLayerConfiguration extends LayerConfiguration implemen public void resetLayerDefaultConfig() { //clear the learning related params for all layers in the origConf and set to defaults this.setIUpdater(null); - this.setWeightInitFn(null); + this.setWeightInit(null); this.setBiasInit(Double.NaN); this.setGainInit(Double.NaN); this.regularization = null; @@ -103,9 +105,6 @@ public abstract class BaseLayerConfiguration extends LayerConfiguration implemen @Override public BaseLayerConfiguration clone() { BaseLayerConfiguration clone = (BaseLayerConfiguration) super.clone(); - if (clone.iDropout != null) { - clone.iDropout = clone.iDropout.clone(); - } if(regularization != null){ //Regularization fields are _usually_ thread safe and immutable, but let's clone to be sure clone.regularization = new ArrayList<>(regularization.size()); @@ -170,7 +169,7 @@ public abstract class BaseLayerConfiguration extends LayerConfiguration implemen * * @see IWeightInit */ - protected IWeightInit weightInitFn = null; + protected IWeightInit weightInit = null; /** * Bias initialization value, for layers with biases. Defaults to 0 @@ -255,7 +254,7 @@ public abstract class BaseLayerConfiguration extends LayerConfiguration implemen * @see IWeightInit */ public T weightInit(IWeightInit weightInit) { - this.setWeightInitFn(weightInit); + this.setWeightInit(weightInit); return (T) this; } @@ -270,7 +269,7 @@ public abstract class BaseLayerConfiguration extends LayerConfiguration implemen "Not supported!, Use weightInit(Distribution distribution) instead!"); } - this.setWeightInitFn(weightInit.getWeightInitFunction()); + this.setWeightInit(weightInit.getWeightInitFunction()); return (T) this; } @@ -508,4 +507,19 @@ public abstract class BaseLayerConfiguration extends LayerConfiguration implemen } } + /** + * Inherit setting from neural network for those settings, that are not already set or do have + * a layer(type) specific default. + * @param conf the neural net configration to inherit parameters from + */ + @Override + public void runInheritance(@NotNull NeuralNetConfiguration conf) { + super.runInheritance(conf); + if(this.biasUpdater == null ) this.biasUpdater = conf.getBiasUpdater(); + if(this.iUpdater == null ) this.iUpdater = conf.getUpdater(); + if(this.regularizationBias == null) this.regularizationBias = conf.getRegularizationBias(); + if(this.regularization == null ) this.regularization = conf.getRegularization(); + if(this.gradientNormalization == null) this.gradientNormalization = conf.getGradientNormalization(); + } + } \ No newline at end of file diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java index 25ad6ba4b..9ef539ae9 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java @@ -172,6 +172,7 @@ public class ConvolutionLayer extends FeedForwardLayer { int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { setNetConfiguration(conf); LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + lconf.runInheritance(); LayerValidation.assertNInNOutSet("ConvolutionLayer", getLayerName(), layerIndex, getNIn(), getNOut()); @@ -404,9 +405,10 @@ public class ConvolutionLayer extends FeedForwardLayer { /** * Set the convolution mode for the Convolution layer. See {@link ConvolutionMode} for more details + * Default is {@link ConvolutionMode}.Truncate. * */ - protected ConvolutionMode convolutionMode; + protected ConvolutionMode convolutionMode = ConvolutionMode.Truncate; /** * Kernel dilation. Default: {1, 1}, which is standard convolutions. Used for implementing dilated convolutions, diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DenseLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DenseLayer.java index bfd88a62d..b1dd9856a 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DenseLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DenseLayer.java @@ -62,19 +62,18 @@ public class DenseLayer extends FeedForwardLayer { int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { LayerValidation.assertNInNOutSet("DenseLayerConfiguration", getLayerName(), layerIndex, getNIn(), getNOut()); - LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + lconf.runInheritance(); org.deeplearning4j.nn.layers.feedforward.dense.DenseLayer ret = new org.deeplearning4j.nn.layers.feedforward.dense.DenseLayer(lconf, networkDataType); - if(getWeightInitFn() == null) setWeightInitFn(new WeightInitXavier()); + + if(getWeightInit() == null) setWeightInit(new WeightInitXavier()); ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setLayerConfiguration(lconf); - return ret; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingSequenceLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingSequenceLayer.java index 2ec7b654c..16aeb1acd 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingSequenceLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingSequenceLayer.java @@ -217,14 +217,14 @@ public class EmbeddingSequenceLayer extends FeedForwardLayer { return this; } - @Override + public void setWeightInitFn(IWeightInit weightInit){ if(weightInit instanceof WeightInitEmbedding){ long[] shape = ((WeightInitEmbedding) weightInit).shape(); nIn(shape[0]); nOut(shape[1]); } - this.weightInitFn = weightInit; + this.weightInit = weightInit; } /** diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerConfiguration.java index b0131b80d..394012c4f 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerConfiguration.java @@ -66,28 +66,29 @@ import org.nd4j.linalg.learning.regularization.Regularization; @Slf4j public abstract class LayerConfiguration implements ILayerConfiguration, Serializable, Cloneable { // ITraininableLayerConfiguration - protected String layerName = "noname"; + protected String layerName; @Getter protected List variables = new ArrayList<>(); - public void addVariable(String s) {variables.add(s);} - - protected IDropout iDropout; protected List constraints; protected IWeightNoise weightNoise; + private IDropout iDropout; /** * The type of the layer, basically defines the base class and its properties */ @Getter @Setter @NonNull private LayerType type = LayerType.UNKNOWN; - @Getter @Setter private NeuralNetConfiguration netConfiguration; + @Getter @Setter + private IActivation activationFn; public LayerConfiguration(Builder builder) { this.layerName = builder.layerName; this.iDropout = builder.iDropout; } + public void addVariable(String s) {variables.add(s);} + public String toJson() { throw new RuntimeException("toJson is not implemented for LayerConfiguration"); } @@ -151,6 +152,7 @@ public abstract class LayerConfiguration implements ILayerConfiguration, Seriali public LayerConfiguration getLayer() { return this; } + @Override public LayerConfiguration clone() { try { @@ -218,7 +220,6 @@ public abstract class LayerConfiguration implements ILayerConfiguration, Seriali */ public abstract void setNIn(InputType inputType, boolean override); - /** * For the given type of input to this layer, what preprocessor (if any) is required?
* Returns null if no preprocessor is required, otherwise returns an appropriate {@link @@ -263,11 +264,11 @@ public abstract class LayerConfiguration implements ILayerConfiguration, Seriali "Not supported: all layers with parameters should override this method"); } - public IUpdater getIUpdater() { throw new UnsupportedOperationException( "Not supported: all layers with parameters should override this method"); } + public void setIUpdater(IUpdater iUpdater) { log.warn("Setting an IUpdater on {} with name {} has no effect.", getClass().getSimpleName(), getLayerName()); } @@ -285,15 +286,33 @@ public abstract class LayerConfiguration implements ILayerConfiguration, Seriali this.variables.clear(); } - @Getter @Setter - private IActivation activationFn; + /** + * Inherit setting from neural network for those settings, that are not already set or do have + * a layer(type) specific default. This implementation does not require the neural network configuration to be + * the same as the one returned from this layers {@link #getNetConfiguration()}. + * + * @param conf a neural net configration to inherit parameters from + * + */ + public void runInheritance(@NonNull NeuralNetConfiguration conf) { + if(this.activationFn == null ) this.activationFn = conf.getActivation(); + if(this.iDropout == null ) this.iDropout = conf.getIdropOut(); + if(this.weightNoise == null) this.weightNoise = conf.getWeightNoise(); + } + + /** Runs {@link #runInheritance(NeuralNetConfiguration)} using the layers configurations embedded neural net + * configuration (the one returned from {@link #getNetConfiguration()}. + */ + public void runInheritance() { + runInheritance(getNetConfiguration()); + } @SuppressWarnings("unchecked") @Getter @Setter public abstract static class Builder> { - protected String layerName = "noname"; + protected String layerName; protected List allParamConstraints; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected1D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected1D.java index 2a8afacb7..ea679c9d4 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected1D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected1D.java @@ -215,7 +215,7 @@ public class LocallyConnected1D extends SameDiffLayer { public void applyGlobalConfigToLayer(NeuralNetConfiguration.NeuralNetConfigurationBuilder globalConfig) { NeuralNetConfiguration global_conf = globalConfig.build(); if (activation == null) { - activation = SameDiffLayerUtils.fromIActivation(global_conf.getActivationFn()); + activation = SameDiffLayerUtils.fromIActivation(global_conf.getActivation()); } if (cm == null) { cm = global_conf.getConvolutionMode(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected2D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected2D.java index a33445ce7..5dd5ec62e 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected2D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected2D.java @@ -232,7 +232,7 @@ public class LocallyConnected2D extends SameDiffLayer { public void applyGlobalConfigToLayer(NeuralNetConfiguration.NeuralNetConfigurationBuilder globalConfig) { NeuralNetConfiguration gconf = globalConfig.build(); if (activation == null) { - activation = SameDiffLayerUtils.fromIActivation(gconf.getActivationFn()); + activation = SameDiffLayerUtils.fromIActivation(gconf.getActivation()); } if (cm == null) { cm = gconf.getConvolutionMode(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PReLULayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PReLULayer.java index 50647d0f1..249339df9 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PReLULayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PReLULayer.java @@ -117,7 +117,7 @@ public class PReLULayer extends BaseLayerConfiguration { public Builder(){ //Default to 0s, and don't inherit global default - this.weightInitFn = new WeightInitConstant(0); + this.weightInit = new WeightInitConstant(0); } /** diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RecurrentAttentionLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RecurrentAttentionLayer.java index 10924fd90..a1bbd9f83 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RecurrentAttentionLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RecurrentAttentionLayer.java @@ -152,7 +152,7 @@ public class RecurrentAttentionLayer extends SameDiffLayer { @Override public void applyGlobalConfigToLayer(NeuralNetConfiguration.NeuralNetConfigurationBuilder globalConfig) { if (activation == null) { - activation = SameDiffLayerUtils.fromIActivation(globalConfig.build().getActivationFn()); + activation = SameDiffLayerUtils.fromIActivation(globalConfig.build().getActivation()); } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java index 18c4601c8..0d05a9486 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java @@ -196,7 +196,7 @@ public abstract class AbstractSameDiffLayer extends LayerConfiguration { regularizationBias = bConf.getRegularizationBias(); } if (updater == null) { - updater = bConf.getIUpdater(); + updater = bConf.getUpdater(); } if (biasUpdater == null) { biasUpdater = bConf.getBiasUpdater(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffVertex.java index e9bded983..accc675d0 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffVertex.java @@ -156,7 +156,7 @@ public abstract class SameDiffVertex extends GraphVertex implements ITraininable regularizationBias = b_conf.getRegularizationBias(); } if (updater == null) { - updater = b_conf.getIUpdater(); + updater = b_conf.getUpdater(); } if (biasUpdater == null) { biasUpdater = b_conf.getBiasUpdater(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/VariationalAutoencoder.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/VariationalAutoencoder.java index 85f06a40b..a4cf67c79 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/VariationalAutoencoder.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/VariationalAutoencoder.java @@ -72,6 +72,7 @@ public class VariationalAutoencoder extends BasePretrainNetwork { LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); org.deeplearning4j.nn.layers.variational.VariationalAutoencoder ret = new org.deeplearning4j.nn.layers.variational.VariationalAutoencoder(lconf, networkDataType); + lconf.runInheritance(); ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/BaseNetConfigDeserializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/BaseNetConfigDeserializer.java index 292b85c10..24a17c263 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/BaseNetConfigDeserializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/BaseNetConfigDeserializer.java @@ -98,7 +98,7 @@ public abstract class BaseNetConfigDeserializer extends StdDeserializer im protected boolean requiresWeightInitFromLegacy(LayerConfiguration[] layers){ for(LayerConfiguration l : layers){ if(l instanceof BaseLayerConfiguration - && ((BaseLayerConfiguration)l).getWeightInitFn() == null){ + && ((BaseLayerConfiguration)l).getWeightInit() == null){ return true; } } @@ -254,7 +254,7 @@ public abstract class BaseNetConfigDeserializer extends StdDeserializer im d = NeuralNetConfiguration.mapper().readValue(dist, Distribution.class); } IWeightInit iwi = w.getWeightInitFunction(d); - baseLayerConfiguration.setWeightInitFn(iwi); + baseLayerConfiguration.setWeightInit(iwi); } catch (Throwable t){ log.warn("Failed to infer weight initialization from legacy JSON format",t); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/ComputationGraphConfigurationDeserializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/ComputationGraphConfigurationDeserializer.java index 92399e037..9f93c43e0 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/ComputationGraphConfigurationDeserializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/ComputationGraphConfigurationDeserializer.java @@ -129,7 +129,7 @@ public class ComputationGraphConfigurationDeserializer } if(requiresLegacyWeightInitHandling && layers[layerIdx] instanceof BaseLayerConfiguration - && ((BaseLayerConfiguration)layers[layerIdx]).getWeightInitFn() == null){ + && ((BaseLayerConfiguration)layers[layerIdx]).getWeightInit() == null){ handleWeightInitBackwardCompatibility((BaseLayerConfiguration)layers[layerIdx], (ObjectNode)next); } @@ -160,7 +160,7 @@ public class ComputationGraphConfigurationDeserializer layerIdx++; } else if("org.deeplearning4j.nn.conf.graph.LayerVertex".equals(cls)){ if(requiresLegacyWeightInitHandling && layers[layerIdx] instanceof BaseLayerConfiguration - && ((BaseLayerConfiguration)layers[layerIdx]).getWeightInitFn() == null) { + && ((BaseLayerConfiguration)layers[layerIdx]).getWeightInit() == null) { //Post JSON format change for subclasses, but before WeightInit was made a class confNode = (ObjectNode) next.get("layerConf"); next = confNode.get("layer"); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/NeuralNetConfigurationDeserializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/NeuralNetConfigurationDeserializer.java index 633650b95..7863aca02 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/NeuralNetConfigurationDeserializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/NeuralNetConfigurationDeserializer.java @@ -141,7 +141,7 @@ public class NeuralNetConfigurationDeserializer extends BaseNetConfigDeserialize } if(requiresLegacyWeightInitHandling && layers[i] instanceof BaseLayerConfiguration - && ((BaseLayerConfiguration) layers[i]).getWeightInitFn() == null) { + && ((BaseLayerConfiguration) layers[i]).getWeightInit() == null) { handleWeightInitBackwardCompatibility((BaseLayerConfiguration) layers[i], on); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java index e8501f312..9774b8c07 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java @@ -88,14 +88,19 @@ public abstract class AbstractLayer impl cacheMode = layerConfiguration.getNetConfiguration().getCacheMode(); } this.dataType = dataType; + if (layerConfiguration.getNetConfiguration() == null) { + throw new RuntimeException("You cannot create a layer from a layer configuration, that is not part of any neural network configuration."); + } this.net = layerConfiguration.getNetConfiguration().getNet(); } public void addTrainingListeners(TrainingListener... listeners) { + if(listeners != null) trainingListeners.addAll(List.of(listeners)); } public void addTrainingListeners(Collection listeners) { + if(listeners != null) trainingListeners.addAll(listeners); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java index 6363c77c5..01aede19f 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java @@ -77,7 +77,6 @@ public abstract class BaseLayer * INDArray params; */ public BaseLayer(LayerConfiguration conf, DataType dataType) { - super(conf, dataType); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/ocnn/OCNNParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/ocnn/OCNNParamInitializer.java index f2e639e21..ad931eb8f 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/ocnn/OCNNParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/ocnn/OCNNParamInitializer.java @@ -21,7 +21,6 @@ package org.deeplearning4j.nn.layers.ocnn; import lombok.val; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.params.DefaultParamInitializer; import org.deeplearning4j.nn.weights.IWeightInit; @@ -154,7 +153,7 @@ public class OCNNParamInitializer extends DefaultParamInitializer { boolean initializeParameters) { org.deeplearning4j.nn.conf.ocnn.OCNNOutputLayer ocnnOutputLayer = ( org.deeplearning4j.nn.conf.ocnn.OCNNOutputLayer) configuration; - IWeightInit weightInit = ocnnOutputLayer.getWeightInitFn(); + IWeightInit weightInit = ocnnOutputLayer.getWeightInit(); if (initializeParameters) { INDArray ret = weightInit.init(weightParamView.size(0), //Fan in weightParamView.size(1), //Fan out diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/variational/VariationalAutoencoder.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/variational/VariationalAutoencoder.java index 3c9d2706b..bf21a6dc8 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/variational/VariationalAutoencoder.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/variational/VariationalAutoencoder.java @@ -92,7 +92,7 @@ public class VariationalAutoencoder implements Layer { protected int epochCount; @Getter @Setter @NonNull private LayerConfiguration layerConfiguration; - private @Getter @Setter Collection trainingListeners; + private @Getter @Setter Collection trainingListeners = new HashSet<>(); public VariationalAutoencoder(@NonNull LayerConfiguration layerConfiguration, DataType dataType) { this.layerConfiguration = layerConfiguration; @@ -113,6 +113,27 @@ public class VariationalAutoencoder implements Layer { .getNumSamples(); } + /** + * Replace the TrainingListeners for this model + * + * @param listeners new listeners + */ + @Override + public void addTrainingListeners(TrainingListener... listeners) { + if(listeners != null) + trainingListeners.addAll(List.of(listeners)); + } + +/** +* + * @param listeners + */ + @Override + public void addTrainingListeners(Collection listeners) { + if(listeners != null) + trainingListeners.addAll(listeners); + } + /** * Get a reference to the network this layer is part of. * @@ -1214,24 +1235,6 @@ public class VariationalAutoencoder implements Layer { //No-op for individual layers } - /** - * Replace the TrainingListeners for this model - * - * @param listeners new listeners - */ - @Override - public void addTrainingListeners(TrainingListener... listeners) { - trainingListeners.addAll(List.of(listeners)); - } - -/** -* - * @param listeners - */ - @Override - public void addTrainingListeners(Collection listeners) { - trainingListeners.addAll(listeners); - } @AllArgsConstructor @Data diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/Convolution3DParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/Convolution3DParamInitializer.java index 745e77a69..b11f9f3d2 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/Convolution3DParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/Convolution3DParamInitializer.java @@ -22,7 +22,6 @@ package org.deeplearning4j.nn.params; import lombok.val; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.Convolution3D; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.weights.WeightInitUtil; @@ -131,7 +130,7 @@ public class Convolution3DParamInitializer extends ConvolutionParamInitializer { val weightsShape = new long[]{outputDepth, inputDepth, kernel[0], kernel[1], kernel[2]}; - return layerConf.getWeightInitFn().init(fanIn, fanOut, weightsShape, 'c', + return layerConf.getWeightInit().init(fanIn, fanOut, weightsShape, 'c', weightView); } else { int[] kernel = layerConf.getKernelSize(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/ConvolutionParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/ConvolutionParamInitializer.java index a8b3ce7aa..9b53e3713 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/ConvolutionParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/ConvolutionParamInitializer.java @@ -180,7 +180,7 @@ public class ConvolutionParamInitializer extends AbstractParamInitializer { val weightsShape = new long[] {outputDepth, inputDepth, kernel[0], kernel[1]}; - return layerConf.getWeightInitFn().init(fanIn, fanOut, weightsShape, 'c', weightView); + return layerConf.getWeightInit().init(fanIn, fanOut, weightsShape, 'c', weightView); } else { int[] kernel = layerConf.getKernelSize(); return WeightInitUtil.reshapeWeights( diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/Deconvolution3DParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/Deconvolution3DParamInitializer.java index 7f8b8e9e6..6e2d2b128 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/Deconvolution3DParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/Deconvolution3DParamInitializer.java @@ -22,7 +22,6 @@ package org.deeplearning4j.nn.params; import lombok.val; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.Deconvolution3D; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.weights.WeightInitUtil; @@ -130,7 +129,7 @@ public class Deconvolution3DParamInitializer extends ConvolutionParamInitializer //libnd4j: [kD, kH, kW, oC, iC] val weightsShape = new long[]{kernel[0], kernel[1], kernel[2], outputDepth, inputDepth}; - return layerConf.getWeightInitFn().init(fanIn, fanOut, weightsShape, 'c', weightView); + return layerConf.getWeightInit().init(fanIn, fanOut, weightsShape, 'c', weightView); } else { int[] kernel = layerConf.getKernelSize(); return WeightInitUtil.reshapeWeights( diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DeconvolutionParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DeconvolutionParamInitializer.java index 1c7ac91d9..463c24ae3 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DeconvolutionParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DeconvolutionParamInitializer.java @@ -21,7 +21,6 @@ package org.deeplearning4j.nn.params; import lombok.val; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.weights.WeightInitUtil; import org.nd4j.linalg.api.ndarray.INDArray; @@ -61,7 +60,7 @@ public class DeconvolutionParamInitializer extends ConvolutionParamInitializer { val weightsShape = new long[] {inputDepth, outputDepth, kernel[0], kernel[1]}; - INDArray weights = layerConf.getWeightInitFn().init( + INDArray weights = layerConf.getWeightInit().init( fanIn, fanOut, weightsShape, 'c', weightView); return weights; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DefaultParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DefaultParamInitializer.java index c20562223..239fd20bf 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DefaultParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DefaultParamInitializer.java @@ -196,13 +196,13 @@ public class DefaultParamInitializer extends AbstractParamInitializer { (org.deeplearning4j.nn.conf.layers.FeedForwardLayer) conf; if (initializeParameters) { - if( layerConf.getWeightInitFn() == null) { + if( layerConf.getWeightInit() == null) { // set a default and set warning - layerConf.setWeightInitFn(new WeightInitXavier()); + layerConf.setWeightInit(new WeightInitXavier()); log.warn("Weight Initializer function was not set on layer {} of class {}, it will default to {}", conf.getLayerName(), conf.getClass().getSimpleName(), WeightInitXavier.class.getSimpleName()); } - return createWeightMatrix(layerConf.getNIn(), layerConf.getNOut(), layerConf.getWeightInitFn(), + return createWeightMatrix(layerConf.getNIn(), layerConf.getNOut(), layerConf.getWeightInit(), weightParamView, true); } else { return createWeightMatrix(layerConf.getNIn(), layerConf.getNOut(), null, weightParamView, false); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DepthwiseConvolutionParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DepthwiseConvolutionParamInitializer.java index 72f2ac6ba..d1bd00449 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DepthwiseConvolutionParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DepthwiseConvolutionParamInitializer.java @@ -23,8 +23,6 @@ package org.deeplearning4j.nn.params; import lombok.val; import org.deeplearning4j.nn.api.AbstractParamInitializer; -import org.deeplearning4j.nn.api.ParamInitializer; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.DepthwiseConvolution2D; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.weights.WeightInitUtil; @@ -193,7 +191,7 @@ public class DepthwiseConvolutionParamInitializer extends AbstractParamInitializ val weightsShape = new long[] {kernel[0], kernel[1], inputDepth, depthMultiplier}; - return layerConf.getWeightInitFn().init(fanIn, fanOut, weightsShape, 'c', + return layerConf.getWeightInit().init(fanIn, fanOut, weightsShape, 'c', weightView); } else { int[] kernel = layerConf.getKernelSize(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/GravesBidirectionalLSTMParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/GravesBidirectionalLSTMParamInitializer.java index 5239a6c2c..e74d69a1a 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/GravesBidirectionalLSTMParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/GravesBidirectionalLSTMParamInitializer.java @@ -22,8 +22,6 @@ package org.deeplearning4j.nn.params; import lombok.val; import org.deeplearning4j.nn.api.AbstractParamInitializer; -import org.deeplearning4j.nn.api.ParamInitializer; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.weights.IWeightInit; import org.deeplearning4j.nn.weights.WeightInitUtil; @@ -159,14 +157,14 @@ public class GravesBidirectionalLSTMParamInitializer extends AbstractParamInitia val inputWShape = new long[]{nLast, 4 * nL}; val recurrentWShape = new long[]{nL, 4 * nL + 3}; - params.put(INPUT_WEIGHT_KEY_FORWARDS, layerConf.getWeightInitFn().init(fanIn, fanOut, inputWShape, + params.put(INPUT_WEIGHT_KEY_FORWARDS, layerConf.getWeightInit().init(fanIn, fanOut, inputWShape, IWeightInit.DEFAULT_WEIGHT_INIT_ORDER, iwF)); - params.put(RECURRENT_WEIGHT_KEY_FORWARDS, layerConf.getWeightInitFn().init(fanIn, fanOut, recurrentWShape, + params.put(RECURRENT_WEIGHT_KEY_FORWARDS, layerConf.getWeightInit().init(fanIn, fanOut, recurrentWShape, IWeightInit.DEFAULT_WEIGHT_INIT_ORDER, rwF)); params.put(BIAS_KEY_FORWARDS, bF); - params.put(INPUT_WEIGHT_KEY_BACKWARDS, layerConf.getWeightInitFn().init(fanIn, fanOut, inputWShape, + params.put(INPUT_WEIGHT_KEY_BACKWARDS, layerConf.getWeightInit().init(fanIn, fanOut, inputWShape, IWeightInit.DEFAULT_WEIGHT_INIT_ORDER, iwR)); - params.put(RECURRENT_WEIGHT_KEY_BACKWARDS, layerConf.getWeightInitFn().init(fanIn, fanOut, recurrentWShape, + params.put(RECURRENT_WEIGHT_KEY_BACKWARDS, layerConf.getWeightInit().init(fanIn, fanOut, recurrentWShape, IWeightInit.DEFAULT_WEIGHT_INIT_ORDER, rwR)); params.put(BIAS_KEY_BACKWARDS, bR); } else { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/GravesLSTMParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/GravesLSTMParamInitializer.java index 5c59e5f7e..265027812 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/GravesLSTMParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/GravesLSTMParamInitializer.java @@ -22,8 +22,6 @@ package org.deeplearning4j.nn.params; import lombok.val; import org.deeplearning4j.nn.api.AbstractParamInitializer; -import org.deeplearning4j.nn.api.ParamInitializer; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.weights.IWeightInit; import org.deeplearning4j.nn.weights.WeightInitUtil; @@ -124,10 +122,10 @@ public class GravesLSTMParamInitializer extends AbstractParamInitializer { if(layerConf.getWeightInitFnRecurrent() != null){ rwInit = layerConf.getWeightInitFnRecurrent(); } else { - rwInit = layerConf.getWeightInitFn(); + rwInit = layerConf.getWeightInit(); } - params.put(INPUT_WEIGHT_KEY,layerConf.getWeightInitFn().init(fanIn, fanOut, inputWShape, + params.put(INPUT_WEIGHT_KEY,layerConf.getWeightInit().init(fanIn, fanOut, inputWShape, IWeightInit.DEFAULT_WEIGHT_INIT_ORDER, inputWeightView)); params.put(RECURRENT_WEIGHT_KEY, rwInit.init(fanIn, fanOut, recurrentWShape, IWeightInit.DEFAULT_WEIGHT_INIT_ORDER, recurrentWeightView)); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/LSTMParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/LSTMParamInitializer.java index 04f12ea32..040822a8a 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/LSTMParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/LSTMParamInitializer.java @@ -27,7 +27,6 @@ import java.util.List; import java.util.Map; import lombok.val; import org.deeplearning4j.nn.api.AbstractParamInitializer; -import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.layers.LSTM; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.weights.IWeightInit; @@ -132,10 +131,10 @@ public class LSTMParamInitializer extends AbstractParamInitializer { if(layerConf.getWeightInitFnRecurrent() != null){ rwInit = layerConf.getWeightInitFnRecurrent(); } else { - rwInit = layerConf.getWeightInitFn(); + rwInit = layerConf.getWeightInit(); } - params.put(INPUT_WEIGHT_KEY, layerConf.getWeightInitFn().init(fanIn, fanOut, inputWShape, + params.put(INPUT_WEIGHT_KEY, layerConf.getWeightInit().init(fanIn, fanOut, inputWShape, IWeightInit.DEFAULT_WEIGHT_INIT_ORDER, inputWeightView)); params.put(RECURRENT_WEIGHT_KEY, rwInit.init(fanIn, fanOut, recurrentWShape, IWeightInit.DEFAULT_WEIGHT_INIT_ORDER, recurrentWeightView)); biasView.put(new INDArrayIndex[] {NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(nL, 2 * nL)}, diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/PReLUParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/PReLUParamInitializer.java index 32b05a04c..11d5638fe 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/PReLUParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/PReLUParamInitializer.java @@ -133,7 +133,7 @@ public class PReLUParamInitializer extends AbstractParamInitializer { PReLULayer layerConf = (PReLULayer) conf; if (initializeParameters) { - return layerConf.getWeightInitFn().init(layerConf.getNIn(), layerConf.getNOut(), + return layerConf.getWeightInit().init(layerConf.getNIn(), layerConf.getNOut(), weightShape, IWeightInit.DEFAULT_WEIGHT_INIT_ORDER, weightParamView); } else { return WeightInitUtil.reshapeWeights(weightShape, weightParamView); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/SeparableConvolutionParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/SeparableConvolutionParamInitializer.java index 9df032560..58547886f 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/SeparableConvolutionParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/SeparableConvolutionParamInitializer.java @@ -23,8 +23,6 @@ package org.deeplearning4j.nn.params; import lombok.val; import org.deeplearning4j.nn.api.AbstractParamInitializer; -import org.deeplearning4j.nn.api.ParamInitializer; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.SeparableConvolution2D; import org.deeplearning4j.nn.weights.WeightInitUtil; @@ -220,7 +218,7 @@ public class SeparableConvolutionParamInitializer extends AbstractParamInitializ val weightsShape = new long[] {depthMultiplier, inputDepth, kernel[0], kernel[1]}; - return layerConf.getWeightInitFn().init(fanIn, fanOut, weightsShape, 'c', + return layerConf.getWeightInit().init(fanIn, fanOut, weightsShape, 'c', weightView); } else { int[] kernel = layerConf.getKernelSize(); @@ -249,7 +247,7 @@ public class SeparableConvolutionParamInitializer extends AbstractParamInitializ val weightsShape = new long[] {outputDepth, depthMultiplier * inputDepth, 1, 1}; - return layerConf.getWeightInitFn().init(fanIn, fanOut, weightsShape, 'c', + return layerConf.getWeightInit().init(fanIn, fanOut, weightsShape, 'c', weightView); } else { return WeightInitUtil.reshapeWeights( diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/SimpleRnnParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/SimpleRnnParamInitializer.java index 603492afa..488c00396 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/SimpleRnnParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/SimpleRnnParamInitializer.java @@ -22,8 +22,6 @@ package org.deeplearning4j.nn.params; import lombok.val; import org.deeplearning4j.nn.api.AbstractParamInitializer; -import org.deeplearning4j.nn.api.ParamInitializer; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.recurrent.SimpleRnn; import org.deeplearning4j.nn.weights.IWeightInit; @@ -102,14 +100,14 @@ public class SimpleRnnParamInitializer extends AbstractParamInitializer { if (initializeParams) { m = getSubsets(paramsView, nIn, nOut, false, hasLayerNorm(c)); - INDArray w = c.getWeightInitFn().init(nIn, nOut, new long[]{nIn, nOut}, 'f', m.get(WEIGHT_KEY)); + INDArray w = c.getWeightInit().init(nIn, nOut, new long[]{nIn, nOut}, 'f', m.get(WEIGHT_KEY)); m.put(WEIGHT_KEY, w); IWeightInit rwInit; if (c.getWeightInitFnRecurrent() != null) { rwInit = c.getWeightInitFnRecurrent(); } else { - rwInit = c.getWeightInitFn(); + rwInit = c.getWeightInit(); } INDArray rw = rwInit.init(nOut, nOut, new long[]{nOut, nOut}, 'f', m.get(RECURRENT_WEIGHT_KEY)); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/VariationalAutoencoderParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/VariationalAutoencoderParamInitializer.java index 9284843d5..362c35170 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/VariationalAutoencoderParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/VariationalAutoencoderParamInitializer.java @@ -21,7 +21,6 @@ package org.deeplearning4j.nn.params; import lombok.val; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder; import org.deeplearning4j.nn.weights.IWeightInit; @@ -200,7 +199,7 @@ public class VariationalAutoencoderParamInitializer extends DefaultParamInitiali int[] encoderLayerSizes = layer.getEncoderLayerSizes(); int[] decoderLayerSizes = layer.getDecoderLayerSizes(); - IWeightInit weightInit = layer.getWeightInitFn(); + IWeightInit weightInit = layer.getWeightInit(); int soFar = 0; for (int i = 0; i < encoderLayerSizes.length; i++) { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/FineTuneConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/FineTuneConfiguration.java index 5d68bd890..b62e77e83 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/FineTuneConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/FineTuneConfiguration.java @@ -164,7 +164,7 @@ public class FineTuneConfiguration { bl.setActivationFn(activationFn); } if (weightInitFn != null) { - bl.setWeightInitFn(weightInitFn); + bl.setWeightInit(weightInitFn); } if (biasInit != null) { bl.setBiasInit(biasInit); @@ -264,10 +264,10 @@ public class FineTuneConfiguration { NeuralNetConfiguration.NeuralNetConfigurationBuilder confBuilder = NeuralNetConfiguration.builder(); if (activationFn != null) { - confBuilder.activationFn(activationFn); + confBuilder.activation(activationFn); } if (weightInitFn != null) { - confBuilder.weightInitFn(weightInitFn); + confBuilder.weightInit(weightInitFn); } if (biasInit != null) { confBuilder.biasInit(biasInit); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearning.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearning.java index 663420f0a..708568d19 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearning.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearning.java @@ -462,7 +462,7 @@ public class TransferLearning { Preconditions.checkArgument(layerImpl instanceof FeedForwardLayer, "nInReplace can only be applide on FeedForward layers;" + "got layer of type %s", layerImpl.getClass().getSimpleName()); FeedForwardLayer layerImplF = (FeedForwardLayer) layerImpl; - layerImplF.setWeightInitFn(init); + layerImplF.setWeightInit(init); layerImplF.setNIn(nIn); long numParams = layerImpl.initializer().numParams(layerConf); INDArray params = Nd4j.create(origModel.getNetConfiguration().getDataType(), 1, numParams); @@ -480,7 +480,7 @@ public class TransferLearning { Preconditions.checkArgument(layerImpl instanceof FeedForwardLayer, "nOutReplace can only be applide on FeedForward layers;" + "got layer of type %s", layerImpl.getClass().getSimpleName()); FeedForwardLayer layerImplF = (FeedForwardLayer) layerImpl; - layerImplF.setWeightInitFn(scheme); + layerImplF.setWeightInit(scheme); layerImplF.setNOut(nOut); long numParams = layerImpl.initializer().numParams(layerConf); INDArray params = Nd4j.create(origModel.getNetConfiguration().getDataType(), 1, numParams); @@ -492,7 +492,7 @@ public class TransferLearning { layerImpl = layerConf; //modify in place if(layerImpl instanceof FeedForwardLayer) { layerImplF = (FeedForwardLayer) layerImpl; - layerImplF.setWeightInitFn(schemeNext); + layerImplF.setWeightInit(schemeNext); layerImplF.setNIn(nOut); numParams = layerImpl.initializer().numParams(layerConf); if (numParams > 0) { @@ -738,7 +738,7 @@ public class TransferLearning { layerImpl.resetLayerDefaultConfig(); FeedForwardLayer layerImplF = (FeedForwardLayer) layerImpl; - layerImplF.setWeightInitFn(scheme); + layerImplF.setWeightInit(scheme); layerImplF.setNIn(nIn); if(editedVertices.contains(layerName) && editedConfigBuilder.getVertices().get(layerName) instanceof LayerVertex @@ -767,7 +767,7 @@ public class TransferLearning { LayerConfiguration layerImpl = layerConf.clone(); layerImpl.resetLayerDefaultConfig(); FeedForwardLayer layerImplF = (FeedForwardLayer) layerImpl; - layerImplF.setWeightInitFn(scheme); + layerImplF.setWeightInit(scheme); layerImplF.setNOut(nOut); if(editedVertices.contains(layerName) && editedConfigBuilder.getVertices().get(layerName) instanceof LayerVertex @@ -806,7 +806,7 @@ public class TransferLearning { continue; layerImpl = layerConf.clone(); layerImplF = (FeedForwardLayer) layerImpl; - layerImplF.setWeightInitFn(schemeNext); + layerImplF.setWeightInit(schemeNext); layerImplF.setNIn(nOut); nInFromNewConfig.put(fanoutVertexName, nOut); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/BaseMultiLayerUpdater.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/BaseMultiLayerUpdater.java index cec9da44a..e7a74999c 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/BaseMultiLayerUpdater.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/BaseMultiLayerUpdater.java @@ -207,10 +207,11 @@ public abstract class BaseMultiLayerUpdater implements Updater */ public void setStateViewArray(INDArray viewArray) { if(this.updaterStateViewArray == null){ - if(viewArray == null) + if(viewArray == null || viewArray.length()==0) return; //No op - for example, SGD and NoOp updater - i.e., no stored state else { - throw new IllegalStateException("Attempting to set updater state view array with null value"); + //this.updaterStateViewArray.set + // throw new IllegalStateException("Attempting to set updater state view array with null value"); } } if (this.updaterStateViewArray.length() != viewArray.length()) @@ -296,7 +297,7 @@ public abstract class BaseMultiLayerUpdater implements Updater //PRE apply (gradient clipping, etc): done on a per-layer basis for (Map.Entry entry : layerGradients.entrySet()) { String layerName = entry.getKey(); - ITrainableLayer layer = layersByName.get(layerName); + ITrainableLayer layer = layersByName.get(layerName); //Todo Layers may have the same name!? preApply(layer, layerGradients.get(layerName), iteration); } diff --git a/cavis-dnn/cavis-dnn-nn/src/test/java/net/brutex/ai/dnn/api/dnnTest.java b/cavis-dnn/cavis-dnn-nn/src/test/java/net/brutex/ai/dnn/api/dnnTest.java index 0b7ce4627..7b5176670 100644 --- a/cavis-dnn/cavis-dnn-nn/src/test/java/net/brutex/ai/dnn/api/dnnTest.java +++ b/cavis-dnn/cavis-dnn-nn/src/test/java/net/brutex/ai/dnn/api/dnnTest.java @@ -29,7 +29,6 @@ import org.apache.commons.lang3.RandomUtils; import org.deeplearning4j.datasets.iterator.FloatsDataSetIterator; import org.deeplearning4j.nn.conf.GradientNormalization; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.ActivationLayer; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; @@ -39,7 +38,6 @@ import org.deeplearning4j.optimize.listeners.ScoreToChartListener; import org.junit.jupiter.api.Test; import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.activations.Activation; -import org.nd4j.linalg.activations.impl.ActivationIdentity; import org.nd4j.linalg.activations.impl.ActivationLReLU; import org.nd4j.linalg.activations.impl.ActivationSigmoid; import org.nd4j.linalg.learning.config.Adam; @@ -85,8 +83,8 @@ class dnnTest { .updater(Adam.builder().learningRate(0.0002).beta1(0.5).build()) .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer) .gradientNormalizationThreshold(100) - .weightInitFn(new WeightInitXavier()) - .activationFn(new ActivationSigmoid()) + .weightInit(new WeightInitXavier()) + .activation(new ActivationSigmoid()) // .inputType(InputType.convolutional(28, 28, 1)) .layer(new DenseLayer.Builder().nIn(6).nOut(20).build()) .layer(new ActivationLayer.Builder(new ActivationLReLU(0.2)).build()) diff --git a/cavis-ui/cavis-ui-vertx/src/main/java/org/deeplearning4j/ui/module/train/TrainModule.java b/cavis-ui/cavis-ui-vertx/src/main/java/org/deeplearning4j/ui/module/train/TrainModule.java index c0df01142..89b2ceef6 100644 --- a/cavis-ui/cavis-ui-vertx/src/main/java/org/deeplearning4j/ui/module/train/TrainModule.java +++ b/cavis-ui/cavis-ui-vertx/src/main/java/org/deeplearning4j/ui/module/train/TrainModule.java @@ -1182,7 +1182,7 @@ public class TrainModule implements UIModule { String.valueOf(nParams)}); if (nParams > 0) { try { - String str = JsonMappers.getMapper().writeValueAsString(bl.getWeightInitFn()); + String str = JsonMappers.getMapper().writeValueAsString(bl.getWeightInit()); layerInfoRows.add(new String[]{ i18N.getMessage("train.model.layerinfotable.layerWeightInit"), str}); } catch (JsonProcessingException e) { diff --git a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/Darknet19.java b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/Darknet19.java index f2b07ec58..c94161a4b 100644 --- a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/Darknet19.java +++ b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/Darknet19.java @@ -29,6 +29,7 @@ import org.deeplearning4j.nn.conf.ComputationGraphConfiguration.GraphBuilder; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.*; import org.deeplearning4j.nn.graph.ComputationGraph; +import org.deeplearning4j.nn.weights.IWeightInit; import org.deeplearning4j.nn.weights.WeightInit; import org.deeplearning4j.zoo.ModelMetaData; import org.deeplearning4j.zoo.PretrainedType; diff --git a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/ResNet50.java b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/ResNet50.java index f530e0781..70abb5722 100644 --- a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/ResNet50.java +++ b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/ResNet50.java @@ -176,7 +176,7 @@ public class ResNet50 extends ZooModel { .activation(Activation.IDENTITY) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(updater) - .weightInitFn(weightInit) + .weightInit(weightInit) .l1(1e-7) .l2(5e-5) .miniBatch(true)