From 3edb90dbd11545b22b51f6eba1a0f67a4f7fee92 Mon Sep 17 00:00:00 2001 From: brian Date: Fri, 7 Apr 2023 14:28:47 +0200 Subject: [PATCH] Playing with some new code 2 Signed-off-by: brian --- .../TupleStreamDataSetIteratorTest.java | 2 +- .../ModelTupleStreamIntegrationTest.java | 4 +- .../solr/handler/ModelTupleStreamTest.java | 6 +- .../solr/ltr/model/ScoringModelTest.java | 6 +- .../remote/JsonModelServerTest.java | 10 +- .../pw/SharedTrainingWrapper.java | 2 +- .../training/SharedTrainingMaster.java | 2 +- .../training/SharedTrainingWorker.java | 4 +- .../spark/parameterserver/BaseSparkTest.java | 6 +- .../train/GradientSharingTrainingTest.java | 12 +- .../spark/api/worker/NetBroadcastTuple.java | 10 +- ...eVaeReconstructionProbWithKeyFunction.java | 2 +- .../score/BaseVaeScoreWithKeyFunction.java | 2 +- .../impl/evaluation/EvaluationRunner.java | 4 +- ...VaeReconstructionErrorWithKeyFunction.java | 2 +- ...GVaeReconstructionProbWithKeyFunction.java | 2 +- .../impl/multilayer/SparkDl4jMultiLayer.java | 16 +- .../scoring/FeedForwardWithKeyFunction.java | 6 +- .../scoring/ScoreExamplesFunction.java | 4 +- .../scoring/ScoreExamplesWithKeyFunction.java | 6 +- .../scoring/ScoreFlatMapFunction.java | 4 +- ...VaeReconstructionErrorWithKeyFunction.java | 6 +- .../VaeReconstructionProbWithKeyFunction.java | 6 +- .../ParameterAveragingTrainingMaster.java | 6 +- .../deeplearning4j/spark/BaseSparkTest.java | 6 +- .../spark/TestEarlyStoppingSpark.java | 12 +- .../TestEarlyStoppingSparkCompGraph.java | 10 +- .../org/deeplearning4j/spark/TestKryo.java | 6 +- .../spark/datavec/TestPreProcessedData.java | 8 +- .../spark/impl/TestKryoWarning.java | 6 +- .../impl/customlayer/TestCustomLayer.java | 6 +- .../impl/graph/TestSparkComputationGraph.java | 10 +- .../spark/impl/misc/TestFrozenLayers.java | 4 +- .../impl/multilayer/TestMiscFunctions.java | 12 +- .../multilayer/TestSparkDl4jMultiLayer.java | 4 +- ...arameterAveragingSparkVsSingleMachine.java | 16 +- ...TestSparkMultiLayerParameterAveraging.java | 52 +- .../stats/TestTrainingStatsCollection.java | 4 +- .../spark/ui/TestListeners.java | 4 +- .../network/MultiLayerNetworkHandler.java | 6 +- .../ActorCriticFactoryCompGraphStdConv.java | 2 +- .../ActorCriticFactoryCompGraphStdDense.java | 2 +- .../ActorCriticFactorySeparateStdDense.java | 10 +- .../rl4j/network/ac/ActorCriticSeparate.java | 10 +- .../deeplearning4j/rl4j/network/dqn/DQN.java | 6 +- .../rl4j/network/dqn/DQNFactoryStdConv.java | 8 +- .../rl4j/network/dqn/DQNFactoryStdDense.java | 6 +- .../org/deeplearning4j/rl4j/NStepRnn.java | 2 +- .../deeplearning4j/rl4j/RobotLakeExample.java | 2 +- .../org/deeplearning4j/rl4j/TMazeExample.java | 2 +- .../network/MultiLayerNetworkHandlerTest.java | 8 +- .../rl4j/policy/PolicyTest.java | 4 +- README.md | 6 +- .../src/test/java/net/brutex/gan/App.java | 76 +- .../src/test/java/net/brutex/gan/GAN.java | 15 +- .../net/brutex/gan/MnistDCGANExample.java | 10 +- .../java/net/brutex/gan/MnistSimpleGAN.java | 9 +- .../test/java/net/brutex/spark/BrianTest.java | 7 +- .../java/net/brutex/spark/BrianTest2.java | 5 +- .../java/net/brutex/spark/TestServer.java | 18 +- .../java/net/brutex/spark/TestServer2.java | 8 +- .../IntegrationTestBaselineGenerator.java | 10 +- .../integration/IntegrationTestRunner.java | 40 +- .../deeplearning4j/integration/TestCase.java | 4 +- .../deeplearning4j/integration/TestUtils.java | 8 +- .../testcases/dl4j/CNN1DTestCases.java | 4 +- .../testcases/dl4j/CNN2DTestCases.java | 20 +- .../testcases/dl4j/CNN3DTestCases.java | 7 +- .../testcases/dl4j/MLPTestCases.java | 11 +- .../testcases/dl4j/RNNTestCases.java | 18 +- .../testcases/dl4j/UnsupervisedTestCases.java | 4 +- build.gradle | 5 +- .../net/brutex/ai/dnn/core/util/ANSI.java | 52 + .../listener/SystemInfoFilePrintListener.java | 16 +- .../listener/SystemInfoPrintListener.java | 16 +- .../core/util/ModelGuesser.java | 14 +- .../LayerHelperValidationUtil.java | 15 +- .../java/org/deeplearning4j/RandomTests.java | 8 +- .../java/org/deeplearning4j/TestUtils.java | 12 +- .../iterator/DataSetIteratorTest.java | 15 +- .../earlystopping/TestEarlyStopping.java | 81 +- .../TestEarlyStoppingCompGraph.java | 22 +- .../org/deeplearning4j/eval/EvalTest.java | 28 +- .../eval/EvaluationToolsTests.java | 5 +- .../java/org/deeplearning4j/eval/ROCTest.java | 6 +- .../eval/RegressionEvalTest.java | 5 +- .../exceptions/TestInvalidConfigurations.java | 41 +- .../exceptions/TestInvalidInput.java | 29 +- .../gradientcheck/AttentionLayerTest.java | 25 +- .../gradientcheck/BNGradientCheckTest.java | 76 +- .../gradientcheck/CNN1DGradientCheckTest.java | 37 +- .../gradientcheck/CNN3DGradientCheckTest.java | 38 +- .../gradientcheck/CNNGradientCheckTest.java | 110 +- .../CapsnetGradientCheckTest.java | 10 +- .../gradientcheck/DropoutGradientCheck.java | 13 +- .../GlobalPoolingGradientCheckTests.java | 13 +- .../gradientcheck/GradientCheckTests.java | 27 +- .../GradientCheckTestsComputationGraph.java | 58 +- .../GradientCheckTestsMasking.java | 19 +- .../gradientcheck/LRNGradientCheckTests.java | 6 +- .../gradientcheck/LSTMGradientCheckTests.java | 46 +- .../LossFunctionGradientCheck.java | 9 +- .../NoBiasGradientCheckTests.java | 13 +- .../OutputLayerGradientChecks.java | 13 +- .../gradientcheck/RnnGradientChecks.java | 13 +- .../UtilLayerGradientChecks.java | 15 +- .../gradientcheck/VaeGradientCheckTests.java | 11 +- .../gradientcheck/YoloGradientCheckTests.java | 8 +- .../ComputationGraphConfigurationTest.java | 30 +- .../org/deeplearning4j/nn/conf/JsonTest.java | 6 +- .../MultiLayerNeuralNetConfigurationTest.java | 728 ++--- .../MultiNeuralNetConfLayerBuilderTest.java | 16 +- .../nn/conf/NeuralNetConfigurationTest.java | 60 +- .../nn/conf/constraints/TestConstraints.java | 41 +- .../nn/conf/dropout/TestDropout.java | 39 +- .../nn/conf/graph/ElementWiseVertexTest.java | 12 +- .../nn/conf/graph/ShiftVertexTest.java | 4 +- .../nn/conf/layers/LayerBuilderTest.java | 14 +- .../nn/conf/layers/LayerConfigTest.java | 61 +- .../layers/LayerConfigValidationTest.java | 49 +- .../conf/preprocessor/CNNProcessorTest.java | 43 +- .../preprocessor/CustomPreprocessorTest.java | 15 +- .../conf/preprocessor/TestPreProcessors.java | 47 +- .../nn/conf/weightnoise/TestWeightNoise.java | 29 +- .../deeplearning4j/nn/dtypes/DTypeTests.java | 86 +- .../nn/graph/ComputationGraphTestRNN.java | 32 +- .../nn/graph/TestCompGraphCNN.java | 4 +- .../nn/graph/TestCompGraphUnsupervised.java | 7 +- .../nn/graph/TestComputationGraphNetwork.java | 162 +- .../nn/graph/TestSetGetParameters.java | 2 +- .../nn/graph/TestVariableLengthTSCG.java | 10 +- .../nn/graph/graphnodes/TestGraphNodes.java | 10 +- .../nn/layers/ActivationLayerTest.java | 59 +- .../nn/layers/AutoEncoderTest.java | 2 +- .../nn/layers/BaseLayerTest.java | 9 +- .../nn/layers/CacheModeTest.java | 20 +- .../nn/layers/CenterLossOutputLayerTest.java | 4 +- .../nn/layers/DropoutLayerTest.java | 27 +- .../nn/layers/FrozenLayerTest.java | 84 +- .../layers/FrozenLayerWithBackpropTest.java | 28 +- .../nn/layers/OutputLayerTest.java | 49 +- .../nn/layers/RepeatVectorTest.java | 6 +- .../deeplearning4j/nn/layers/SeedTest.java | 6 +- .../deeplearning4j/nn/layers/TestDropout.java | 8 +- .../nn/layers/capsule/CapsNetMNISTTest.java | 5 +- .../nn/layers/capsule/CapsuleLayerTest.java | 5 +- .../capsule/CapsuleStrengthLayerTest.java | 5 +- .../layers/capsule/PrimaryCapsulesTest.java | 5 +- .../convolution/ConvDataFormatTests.java | 22 +- .../layers/convolution/Convolution3DTest.java | 8 +- .../ConvolutionLayerSetupTest.java | 190 +- .../convolution/ConvolutionLayerTest.java | 130 +- .../LocallyConnectedLayerTest.java | 16 +- .../layers/convolution/SpaceToDepthTest.java | 6 +- .../convolution/SubsamplingLayerTest.java | 38 +- .../convolution/TestConvolutionModes.java | 39 +- .../layers/convolution/Upsampling1DTest.java | 6 +- .../layers/convolution/Upsampling2DTest.java | 6 +- .../layers/custom/TestCustomActivation.java | 13 +- .../nn/layers/custom/TestCustomLayers.java | 37 +- .../custom/testclasses/CustomLayer.java | 4 +- .../custom/testclasses/CustomOutputLayer.java | 5 +- .../layers/feedforward/dense/DenseTest.java | 9 +- .../embedding/EmbeddingLayerTest.java | 72 +- .../normalization/BatchNormalizationTest.java | 55 +- .../normalization/LocalResponseTest.java | 11 +- .../objdetect/TestYolo2OutputLayer.java | 11 +- .../nn/layers/ocnn/OCNNOutputLayerTest.java | 19 +- .../pooling/GlobalPoolingMaskingTests.java | 17 +- .../layers/recurrent/BidirectionalTest.java | 22 +- .../GravesBidirectionalLSTMTest.java | 59 +- .../nn/layers/recurrent/GravesLSTMTest.java | 27 +- .../layers/recurrent/MaskZeroLayerTest.java | 6 +- .../layers/recurrent/RnnDataFormatTests.java | 6 +- .../recurrent/TestLastTimeStepLayer.java | 4 +- .../recurrent/TestRecurrentWeightInit.java | 2 +- .../nn/layers/recurrent/TestRnnLayers.java | 24 +- .../nn/layers/recurrent/TestSimpleRnn.java | 5 +- .../layers/recurrent/TestTimeDistributed.java | 17 +- .../samediff/SameDiffCustomLayerTests.java | 7 +- .../nn/layers/samediff/TestSameDiffConv.java | 15 +- .../nn/layers/samediff/TestSameDiffDense.java | 39 +- .../samediff/TestSameDiffDenseVertex.java | 4 +- .../layers/samediff/TestSameDiffLambda.java | 8 +- .../layers/samediff/TestSameDiffOutput.java | 11 +- .../testlayers/MinimalSameDiffDense.java | 2 +- .../samediff/testlayers/SameDiffConv.java | 7 +- .../samediff/testlayers/SameDiffDense.java | 5 +- .../testlayers/SameDiffMSEOutputLayer.java | 2 +- .../nn/layers/variational/TestVAE.java | 46 +- .../nn/misc/CloseNetworkTests.java | 7 +- .../deeplearning4j/nn/misc/LargeNetTest.java | 8 +- .../deeplearning4j/nn/misc/TestLrChanges.java | 36 +- .../nn/misc/TestMemoryReports.java | 21 +- .../nn/misc/TestNetConversion.java | 13 +- .../nn/misc/WorkspaceTests.java | 58 +- .../nn/mkldnn/ValidateMKLDNN.java | 15 +- .../nn/multilayer/BackPropMLPTest.java | 11 +- .../nn/multilayer/MultiLayerTest.java | 2745 +++++++++-------- .../nn/multilayer/MultiLayerTestRNN.java | 62 +- .../nn/multilayer/TestMasking.java | 15 +- .../nn/multilayer/TestSetGetParameters.java | 21 +- .../nn/multilayer/TestVariableLengthTS.java | 24 +- .../rl/TestMultiModelGradientApplication.java | 13 +- .../nn/transferlearning/TestFrozenLayers.java | 11 +- .../TestTransferLearningModelSerializer.java | 15 +- .../TransferLearningCompGraphTest.java | 35 +- .../TransferLearningComplex.java | 15 +- .../TransferLearningHelperTest.java | 46 +- .../TransferLearningMLNTest.java | 134 +- .../nn/updater/TestGradientNormalization.java | 30 +- .../nn/updater/TestUpdaters.java | 83 +- .../nn/updater/custom/TestCustomUpdater.java | 7 +- .../nn/weights/WeightInitIdentityTest.java | 6 +- .../solver/BackTrackLineSearchTest.java | 15 +- .../optimize/solver/TestOptimizers.java | 51 +- .../listener/TestCheckpointListener.java | 3 +- .../listener/TestFailureListener.java | 7 +- .../optimizer/listener/TestListeners.java | 27 +- .../parallelism/RandomTests.java | 13 +- .../listener/TestSystemInfoPrintListener.java | 3 +- .../regressiontest/MiscRegressionTests.java | 9 +- .../regressiontest/RegressionTest050.java | 14 +- .../regressiontest/RegressionTest060.java | 24 +- .../regressiontest/RegressionTest071.java | 24 +- .../regressiontest/RegressionTest080.java | 24 +- .../regressiontest/RegressionTest100a.java | 20 +- .../regressiontest/RegressionTest100b3.java | 22 +- .../regressiontest/RegressionTest100b4.java | 50 +- .../regressiontest/RegressionTest100b6.java | 50 +- .../customlayer100a/CustomLayer.java | 4 +- .../customlayer100a/CustomLayerImpl.java | 6 +- .../CompareTrainingImplementations.java | 7 +- .../util/CrashReportingUtilTest.java | 7 +- .../deeplearning4j/util/ModelGuesserTest.java | 27 +- .../util/ModelSerializerTest.java | 29 +- .../util/ModelValidatorTests.java | 5 +- .../nn/modelimport/keras/KerasLayer.java | 21 +- .../nn/modelimport/keras/KerasModel.java | 14 +- .../modelimport/keras/KerasModelImport.java | 10 +- .../keras/KerasSequentialModel.java | 40 +- .../modelimport/keras/layers/TFOpLayer.java | 8 +- .../keras/layers/TFOpLayerImpl.java | 3 +- .../keras/layers/recurrent/KerasLSTM.java | 2 +- .../layers/recurrent/KerasSimpleRnn.java | 4 +- .../layers/wrappers/KerasBidirectional.java | 11 +- .../keras/utils/KerasLayerUtils.java | 8 +- .../keras/utils/KerasModelUtils.java | 6 +- .../configurations/FullModelComparisons.java | 5 +- .../Keras1ModelConfigurationTest.java | 6 +- .../Keras2ModelConfigurationTest.java | 11 +- .../configurations/KerasModelImportTest.java | 12 +- .../keras/e2e/KerasCustomLayerTest.java | 3 +- .../keras/e2e/KerasModelEndToEndTest.java | 10 +- .../models/word2vec/Word2VecTestsSmall.java | 6 +- cavis-dnn/cavis-dnn-nn/build.gradle | 3 +- .../java/net/brutex/ai/dnn/api/Animal.java | 68 + .../ai/dnn/api/IActivationFunction.java | 57 + .../java/net/brutex/ai/dnn/api/IModel.java | 244 +- .../net/brutex/ai/dnn/api/INeuralNetwork.java | 122 - .../dnn/api/INeuralNetworkConfiguration.java | 6 +- .../java/net/brutex/ai/dnn/api/IUnit.java | 47 + .../java/net/brutex/ai/dnn/api/LayerType.java | 52 + .../main/java/net/brutex/ai/dnn/api/NN.java | 42 + .../dnn/conf/NeuralNetworkConfiguration.java | 705 ----- .../conf/layer/DenseLayerConfiguration.java | 62 - .../layer/FeedForwardLayerConfiguration.java | 99 - .../dnn/networks/ArtificialNeuralNetwork.java | 52 +- .../EarlyStoppingConfiguration.java | 6 +- .../EarlyStoppingModelSaver.java | 4 +- .../earlystopping/EarlyStoppingResult.java | 4 +- .../listener/EarlyStoppingListener.java | 4 +- .../saver/InMemoryModelSaver.java | 4 +- .../scorecalc/AutoencoderScoreCalculator.java | 12 +- .../ClassificationScoreCalculator.java | 4 +- .../scorecalc/DataSetLossCalculator.java | 10 +- .../scorecalc/ROCScoreCalculator.java | 4 +- .../scorecalc/RegressionScoreCalculator.java | 4 +- .../scorecalc/ScoreCalculator.java | 4 +- .../VAEReconErrorScoreCalculator.java | 12 +- .../VAEReconProbScoreCalculator.java | 12 +- .../base/BaseIEvaluationScoreCalculator.java | 4 +- .../scorecalc/base/BaseScoreCalculator.java | 4 +- .../trainer/BaseEarlyStoppingTrainer.java | 6 +- .../trainer/EarlyStoppingTrainer.java | 5 +- .../trainer/IEarlyStoppingTrainer.java | 4 +- .../gradientcheck/GradientCheckUtil.java | 41 +- .../nn/adapters/YoloModelAdapter.java | 4 +- .../nn/api/AbstractParamInitializer.java} | 22 +- .../org/deeplearning4j/nn/api/Classifier.java | 3 +- .../java/org/deeplearning4j/nn/api/Layer.java | 41 +- .../java/org/deeplearning4j/nn/api/Model.java | 237 -- .../deeplearning4j/nn/api/ModelAdapter.java | 3 +- .../nn/api/ParamInitializer.java | 24 +- .../org/deeplearning4j/nn/api/Trainable.java | 25 +- .../nn/api/layers/RecurrentLayer.java | 2 - .../conf/ComputationGraphConfiguration.java | 85 +- .../nn/conf/MultiLayerConfiguration.java | 841 ----- .../NeuralNetBaseBuilderConfiguration.java | 1021 ++++++ .../nn/conf/NeuralNetConfiguration.java | 2163 +++++++------ .../nn/conf/constraint/BaseConstraint.java | 4 +- .../nn/conf/graph/LayerVertex.java | 38 +- .../nn/conf/layers/ActivationLayer.java | 13 +- .../nn/conf/layers/AutoEncoder.java | 9 +- .../nn/conf/layers/BaseLayer.java | 5 +- .../nn/conf/layers/BaseUpsamplingLayer.java | 4 +- .../nn/conf/layers/BatchNormalization.java | 12 +- .../nn/conf/layers/CapsuleLayer.java | 2 +- .../nn/conf/layers/CapsuleStrengthLayer.java | 2 +- .../nn/conf/layers/CenterLossOutputLayer.java | 10 +- .../nn/conf/layers/Cnn3DLossLayer.java | 8 +- .../nn/conf/layers/CnnLossLayer.java | 8 +- .../nn/conf/layers/Convolution1DLayer.java | 9 +- .../nn/conf/layers/Convolution3D.java | 8 +- .../nn/conf/layers/ConvolutionLayer.java | 11 +- .../nn/conf/layers/Deconvolution2D.java | 9 +- .../nn/conf/layers/Deconvolution3D.java | 10 +- .../nn/conf/layers/DenseLayer.java | 15 +- .../conf/layers/DepthwiseConvolution2D.java | 8 +- .../nn/conf/layers/DropoutLayer.java | 13 +- .../nn/conf/layers/EmbeddingLayer.java | 9 +- .../conf/layers/EmbeddingSequenceLayer.java | 8 +- .../nn/conf/layers/FeedForwardLayer.java | 3 + .../nn/conf/layers/GlobalPoolingLayer.java | 10 +- .../conf/layers/GravesBidirectionalLSTM.java | 10 +- .../nn/conf/layers/GravesLSTM.java | 10 +- .../deeplearning4j/nn/conf/layers/LSTM.java | 9 +- .../{Layer.java => LayerConfiguration.java} | 64 +- .../nn/conf/layers/LayerValidation.java | 6 +- .../layers/LocalResponseNormalization.java | 14 +- .../nn/conf/layers/LocallyConnected1D.java | 7 +- .../nn/conf/layers/LocallyConnected2D.java | 7 +- .../nn/conf/layers/LossLayer.java | 8 +- .../nn/conf/layers/NoParamLayer.java | 5 +- .../nn/conf/layers/OutputLayer.java | 7 +- .../nn/conf/layers/PReLULayer.java | 7 +- .../nn/conf/layers/PrimaryCapsules.java | 2 +- .../conf/layers/RecurrentAttentionLayer.java | 4 +- .../nn/conf/layers/RnnLossLayer.java | 9 +- .../nn/conf/layers/RnnOutputLayer.java | 7 +- .../conf/layers/SeparableConvolution2D.java | 9 +- .../nn/conf/layers/SpaceToBatchLayer.java | 10 +- .../nn/conf/layers/SpaceToDepthLayer.java | 10 +- .../nn/conf/layers/Subsampling1DLayer.java | 8 +- .../nn/conf/layers/Subsampling3DLayer.java | 10 +- .../nn/conf/layers/SubsamplingLayer.java | 10 +- .../nn/conf/layers/Upsampling1D.java | 9 +- .../nn/conf/layers/Upsampling2D.java | 8 +- .../nn/conf/layers/Upsampling3D.java | 10 +- .../nn/conf/layers/ZeroPadding1DLayer.java | 10 +- .../nn/conf/layers/ZeroPadding3DLayer.java | 10 +- .../nn/conf/layers/ZeroPaddingLayer.java | 10 +- .../conf/layers/convolutional/Cropping1D.java | 12 +- .../conf/layers/convolutional/Cropping2D.java | 12 +- .../conf/layers/convolutional/Cropping3D.java | 12 +- .../misc/ElementWiseMultiplicationLayer.java | 10 +- .../nn/conf/layers/misc/FrozenLayer.java | 66 +- .../layers/misc/FrozenLayerWithBackprop.java | 25 +- .../nn/conf/layers/misc/RepeatVector.java | 9 +- .../layers/objdetect/Yolo2OutputLayer.java | 13 +- .../conf/layers/recurrent/Bidirectional.java | 28 +- .../conf/layers/recurrent/LastTimeStep.java | 9 +- .../nn/conf/layers/recurrent/SimpleRnn.java | 8 +- .../layers/recurrent/TimeDistributed.java | 10 +- .../samediff/AbstractSameDiffLayer.java | 23 +- .../conf/layers/samediff/SameDiffLayer.java | 10 +- .../layers/samediff/SameDiffOutputLayer.java | 9 +- .../conf/layers/samediff/SameDiffVertex.java | 19 +- .../nn/conf/layers/util/MaskLayer.java | 8 +- .../nn/conf/layers/util/MaskZeroLayer.java | 14 +- .../variational/VariationalAutoencoder.java | 8 +- .../conf/layers/wrapper/BaseWrapperLayer.java | 26 +- .../nn/conf/ocnn/OCNNOutputLayer.java | 8 +- .../conf/serde/BaseNetConfigDeserializer.java | 28 +- ...utationGraphConfigurationDeserializer.java | 14 +- .../nn/conf/serde/JsonMappers.java | 6 +- ...> NeuralNetConfigurationDeserializer.java} | 25 +- .../conf/serde/legacy/LegacyJsonFormat.java | 2 +- .../nn/conf/weightnoise/DropConnect.java | 6 +- .../nn/conf/weightnoise/WeightNoise.java | 6 +- .../nn/graph/ComputationGraph.java | 124 +- .../nn/graph/vertex/BaseGraphVertex.java | 12 +- .../nn/graph/vertex/BaseWrapperVertex.java | 4 +- .../nn/graph/vertex/GraphVertex.java | 2 +- .../nn/graph/vertex/impl/FrozenVertex.java | 21 + .../nn/graph/vertex/impl/LayerVertex.java | 6 +- .../nn/layers/AbstractLayer.java | 1133 ++++--- .../nn/layers/ActivationLayer.java | 3 +- .../deeplearning4j/nn/layers/BaseLayer.java | 968 +++--- .../nn/layers/BaseOutputLayer.java | 9 +- .../nn/layers/BasePretrainNetwork.java | 30 +- .../nn/layers/DropoutLayer.java | 3 +- .../deeplearning4j/nn/layers/FrozenLayer.java | 6 +- .../nn/layers/FrozenLayerWithBackprop.java | 2 +- .../deeplearning4j/nn/layers/LossLayer.java | 3 +- .../deeplearning4j/nn/layers/OutputLayer.java | 3 +- .../nn/layers/RepeatVector.java | 3 +- .../nn/layers/convolution/Cnn3DLossLayer.java | 15 +- .../nn/layers/convolution/CnnLossLayer.java | 3 +- .../convolution/Convolution1DLayer.java | 9 +- .../convolution/Convolution3DLayer.java | 7 +- .../layers/convolution/ConvolutionLayer.java | 15 +- .../layers/convolution/Cropping1DLayer.java | 8 +- .../layers/convolution/Cropping2DLayer.java | 8 +- .../layers/convolution/Cropping3DLayer.java | 7 +- .../convolution/Deconvolution2DLayer.java | 7 +- .../convolution/Deconvolution3DLayer.java | 5 +- .../DepthwiseConvolution2DLayer.java | 7 +- .../SeparableConvolution2DLayer.java | 7 +- .../nn/layers/convolution/SpaceToBatch.java | 3 +- .../nn/layers/convolution/SpaceToDepth.java | 3 +- .../convolution/ZeroPadding1DLayer.java | 7 +- .../convolution/ZeroPadding3DLayer.java | 7 +- .../layers/convolution/ZeroPaddingLayer.java | 5 +- .../subsampling/Subsampling1DLayer.java | 3 +- .../subsampling/Subsampling3DLayer.java | 5 +- .../subsampling/SubsamplingLayer.java | 5 +- .../convolution/upsampling/Upsampling1D.java | 5 +- .../convolution/upsampling/Upsampling2D.java | 3 +- .../convolution/upsampling/Upsampling3D.java | 3 +- .../nn/layers/feedforward/PReLU.java | 3 +- .../feedforward/autoencoder/AutoEncoder.java | 3 +- .../layers/feedforward/dense/DenseLayer.java | 3 +- .../ElementWiseMultiplicationLayer.java | 5 +- .../feedforward/embedding/EmbeddingLayer.java | 3 +- .../embedding/EmbeddingSequenceLayer.java | 3 +- .../nn/layers/mkldnn/MKLDNNLSTMHelper.java | 2 +- .../normalization/BatchNormalization.java | 14 +- .../LocalResponseNormalization.java | 6 +- .../nn/layers/objdetect/Yolo2OutputLayer.java | 3 +- .../nn/layers/ocnn/OCNNOutputLayer.java | 13 +- .../nn/layers/ocnn/OCNNParamInitializer.java | 32 +- .../nn/layers/pooling/GlobalPoolingLayer.java | 18 +- .../layers/recurrent/BaseRecurrentLayer.java | 3 +- .../layers/recurrent/BidirectionalLayer.java | 137 +- .../recurrent/GravesBidirectionalLSTM.java | 15 +- .../nn/layers/recurrent/GravesLSTM.java | 7 +- .../nn/layers/recurrent/LSTM.java | 8 +- .../nn/layers/recurrent/LSTMHelpers.java | 2 +- .../layers/recurrent/LastTimeStepLayer.java | 4 +- .../nn/layers/recurrent/RnnLossLayer.java | 3 +- .../nn/layers/recurrent/RnnOutputLayer.java | 3 +- .../nn/layers/recurrent/SimpleRnn.java | 3 +- .../layers/samediff/SameDiffGraphVertex.java | 2 +- .../nn/layers/samediff/SameDiffLayer.java | 13 +- .../layers/samediff/SameDiffOutputLayer.java | 13 +- .../training/CenterLossOutputLayer.java | 11 +- .../nn/layers/util/MaskLayer.java | 3 +- .../variational/VariationalAutoencoder.java | 169 +- .../nn/layers/wrapper/BaseWrapperLayer.java | 489 +-- .../nn/multilayer/MultiLayerNetwork.java | 563 ++-- .../BatchNormalizationParamInitializer.java | 50 +- .../params/BidirectionalParamInitializer.java | 48 +- .../nn/params/CenterLossParamInitializer.java | 18 +- .../params/Convolution3DParamInitializer.java | 22 +- .../params/ConvolutionParamInitializer.java | 50 +- .../Deconvolution3DParamInitializer.java | 23 +- .../params/DeconvolutionParamInitializer.java | 9 +- .../nn/params/DefaultParamInitializer.java | 74 +- .../DepthwiseConvolutionParamInitializer.java | 40 +- .../params/ElementWiseParamInitializer.java | 16 +- .../nn/params/EmptyParamInitializer.java | 26 +- .../params/FrozenLayerParamInitializer.java | 120 +- ...ozenLayerWithBackpropParamInitializer.java | 42 +- ...avesBidirectionalLSTMParamInitializer.java | 30 +- .../nn/params/GravesLSTMParamInitializer.java | 30 +- .../nn/params/LSTMParamInitializer.java | 38 +- .../nn/params/PReLUParamInitializer.java | 35 +- .../nn/params/PretrainParamInitializer.java | 17 +- .../nn/params/SameDiffParamInitializer.java | 30 +- .../SeparableConvolutionParamInitializer.java | 44 +- .../nn/params/SimpleRnnParamInitializer.java | 33 +- ...ariationalAutoencoderParamInitializer.java | 24 +- .../params/WrapperLayerParamInitializer.java | 56 +- .../FineTuneConfiguration.java | 1479 ++++----- .../nn/transferlearning/TransferLearning.java | 151 +- .../TransferLearningHelper.java | 15 +- .../nn/updater/BaseMultiLayerUpdater.java | 10 +- .../nn/updater/LayerUpdater.java | 4 +- .../nn/updater/MultiLayerUpdater.java | 2 +- .../nn/updater/UpdaterCreator.java | 4 +- .../graph/ComputationGraphUpdater.java | 5 +- .../org/deeplearning4j/optimize/Solver.java | 8 +- .../optimize/api/BaseTrainingListener.java | 16 +- .../optimize/api/ConvexOptimizer.java | 4 +- .../optimize/api/IterationListener.java | 4 +- .../optimize/api/TrainingListener.java | 24 +- .../listeners/CheckpointListener.java | 24 +- .../CollectScoresIterationListener.java | 4 +- .../listeners/CollectScoresListener.java | 4 +- .../ComposableIterationListener.java | 4 +- .../listeners/EvaluativeListener.java | 12 +- .../listeners/FailureTestingListener.java | 34 +- .../listeners/PerformanceListener.java | 6 +- .../listeners/ScoreIterationListener.java | 6 +- .../listeners/ScoreToChartListener.java | 4 +- .../listeners/SleepyTrainingListener.java | 16 +- .../listeners/TimeIterationListener.java | 6 +- .../callbacks/EvaluationCallback.java | 4 +- .../callbacks/ModelSavingCallback.java | 6 +- .../optimize/solvers/BackTrackLineSearch.java | 11 +- .../optimize/solvers/BaseOptimizer.java | 31 +- .../optimize/solvers/ConjugateGradient.java | 4 +- .../optimize/solvers/LBFGS.java | 4 +- .../optimize/solvers/LineGradientDescent.java | 4 +- .../solvers/StochasticGradientDescent.java | 4 +- .../EncodedGradientsAccumulator.java | 4 +- .../util/Convolution1DUtils.java | 5 +- .../deeplearning4j/util/ConvolutionUtils.java | 18 +- .../util/CrashReportingUtil.java | 24 +- .../util/DL4JModelValidator.java | 12 +- .../deeplearning4j/util/ModelSerializer.java | 30 +- .../org/deeplearning4j/util/NetworkUtils.java | 35 +- .../deeplearning4j/util/OutputLayerUtil.java | 8 +- .../deeplearning4j/util/TimeSeriesUtils.java | 4 +- .../main/resources/simplelogger.properties | 22 + .../java/net/brutex/ai/dnn/api/dnnTest.java | 23 +- .../brutex/ai/dnn/conf/layer/FFLayerTest.java | 11 - .../ParameterServerTrainer.java | 10 +- .../ParameterServerTrainerContext.java | 10 +- .../ParameterServerParallelWrapperTest.java | 8 +- .../EarlyStoppingParallelTrainer.java | 8 +- .../parallelism/InplaceParallelInference.java | 26 +- .../parallelism/ParallelInference.java | 30 +- .../parallelism/ParallelWrapper.java | 9 +- .../factory/DefaultTrainerContext.java | 10 +- .../factory/SymmetricTrainerContext.java | 11 +- .../parallelism/factory/TrainerContext.java | 10 +- .../parallelism/main/ParallelWrapperMain.java | 4 +- .../parallelism/trainer/DefaultTrainer.java | 18 +- .../parallelism/trainer/SymmetricTrainer.java | 4 +- .../parallelism/trainer/Trainer.java | 10 +- .../InplaceParallelInferenceTest.java | 8 +- .../parallelism/ParallelInferenceTest.java | 25 +- .../parallelism/ParallelWrapperTest.java | 10 +- .../parallelism/TestListeners.java | 31 +- .../TestParallelEarlyStopping.java | 7 +- .../TestParallelEarlyStoppingUI.java | 3 +- .../factory/DefaultTrainerContextTest.java | 11 +- .../factory/SymmetricTrainerContextTest.java | 8 +- .../main/ParallelWrapperMainTest.java | 8 +- .../spark/api/TrainingHook.java | 10 +- .../spark/api/worker/NetBroadcastTuple.java | 10 +- .../BaseSparkEarlyStoppingTrainer.java | 4 +- ...eVaeReconstructionProbWithKeyFunction.java | 2 +- .../score/BaseVaeScoreWithKeyFunction.java | 2 +- .../impl/evaluation/EvaluationRunner.java | 10 +- .../impl/graph/SparkComputationGraph.java | 2 +- ...VaeReconstructionErrorWithKeyFunction.java | 2 +- ...GVaeReconstructionProbWithKeyFunction.java | 2 +- .../impl/multilayer/SparkDl4jMultiLayer.java | 18 +- .../scoring/FeedForwardWithKeyFunction.java | 6 +- .../scoring/ScoreExamplesFunction.java | 5 +- .../scoring/ScoreExamplesWithKeyFunction.java | 6 +- .../scoring/ScoreFlatMapFunction.java | 4 +- ...VaeReconstructionErrorWithKeyFunction.java | 9 +- .../VaeReconstructionProbWithKeyFunction.java | 6 +- .../ParameterAveragingTrainingMaster.java | 6 +- .../ParameterAveragingTrainingWorker.java | 4 +- .../deeplearning4j/spark/BaseSparkTest.java | 7 +- .../spark/TestEarlyStoppingSpark.java | 11 +- .../TestEarlyStoppingSparkCompGraph.java | 10 +- .../org/deeplearning4j/spark/TestKryo.java | 13 +- .../spark/datavec/TestPreProcessedData.java | 7 +- .../spark/impl/TestKryoWarning.java | 5 +- .../impl/customlayer/TestCustomLayer.java | 5 +- .../impl/customlayer/layer/CustomLayer.java | 4 +- .../impl/graph/TestSparkComputationGraph.java | 10 +- .../spark/impl/misc/TestFrozenLayers.java | 23 +- .../impl/multilayer/TestMiscFunctions.java | 11 +- .../multilayer/TestSparkDl4jMultiLayer.java | 4 +- ...arameterAveragingSparkVsSingleMachine.java | 16 +- ...TestSparkMultiLayerParameterAveraging.java | 63 +- .../stats/TestTrainingStatsCollection.java | 4 +- .../spark/ui/TestListeners.java | 3 +- .../ParameterServerTrainingHook.java | 10 +- .../pw/SharedTrainingWrapper.java | 8 +- .../training/SharedTrainingMaster.java | 2 +- .../training/SharedTrainingWorker.java | 4 +- .../spark/parameterserver/BaseSparkTest.java | 7 +- .../train/GradientSharingTrainingTest.java | 18 +- .../deeplearning4j/plot/BarnesHutTsne.java | 8 +- .../ConvolutionalIterationListener.java | 14 +- .../org/deeplearning4j/ui/ManualTests.java | 24 +- .../ui/weights/TestConvolutionalListener.java | 7 +- .../ui/model/stats/BaseStatsListener.java | 52 +- .../ui/stats/TestStatsListener.java | 7 +- .../ui/stats/TestTransferStatsCollection.java | 3 +- .../ui/module/train/TrainModule.java | 27 +- .../ui/module/train/TrainModuleUtils.java | 30 +- .../deeplearning4j/ui/TestRemoteReceiver.java | 5 +- .../org/deeplearning4j/ui/TestVertxUI.java | 14 +- .../deeplearning4j/ui/TestVertxUIManual.java | 9 +- .../ui/TestVertxUIMultiSession.java | 9 +- .../deeplearning4j/zoo/InstantiableModel.java | 6 +- .../java/org/deeplearning4j/zoo/ZooModel.java | 6 +- .../org/deeplearning4j/zoo/model/AlexNet.java | 21 +- .../deeplearning4j/zoo/model/Darknet19.java | 13 +- .../zoo/model/FaceNetNN4Small2.java | 9 +- .../zoo/model/InceptionResNetV1.java | 12 +- .../org/deeplearning4j/zoo/model/LeNet.java | 15 +- .../org/deeplearning4j/zoo/model/NASNet.java | 8 +- .../deeplearning4j/zoo/model/ResNet50.java | 13 +- .../deeplearning4j/zoo/model/SimpleCNN.java | 17 +- .../deeplearning4j/zoo/model/SqueezeNet.java | 12 +- .../zoo/model/TextGenerationLSTM.java | 15 +- .../deeplearning4j/zoo/model/TinyYOLO.java | 9 +- .../org/deeplearning4j/zoo/model/UNet.java | 9 +- .../org/deeplearning4j/zoo/model/VGG16.java | 8 +- .../org/deeplearning4j/zoo/model/VGG19.java | 10 +- .../deeplearning4j/zoo/model/Xception.java | 11 +- .../org/deeplearning4j/zoo/model/YOLO2.java | 9 +- .../deeplearning4j/zoo/TestInstantiation.java | 5 +- .../org/deeplearning4j/zoo/TestUtils.java | 2 +- 614 files changed, 12080 insertions(+), 11594 deletions(-) create mode 100644 cavis-dnn/cavis-dnn-core/src/main/java/net/brutex/ai/dnn/core/util/ANSI.java create mode 100644 cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/Animal.java create mode 100644 cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/IActivationFunction.java delete mode 100644 cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/INeuralNetwork.java create mode 100644 cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/IUnit.java create mode 100644 cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/LayerType.java create mode 100644 cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/NN.java delete mode 100644 cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/conf/NeuralNetworkConfiguration.java delete mode 100644 cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/conf/layer/DenseLayerConfiguration.java delete mode 100644 cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/conf/layer/FeedForwardLayerConfiguration.java rename cavis-dnn/cavis-dnn-nn/src/main/java/{net/brutex/ai/dnn/conf/layer/AbstractLayerConfiguration.java => org/deeplearning4j/nn/api/AbstractParamInitializer.java} (67%) delete mode 100644 cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/Model.java delete mode 100644 cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/MultiLayerConfiguration.java create mode 100644 cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetBaseBuilderConfiguration.java rename cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/{Layer.java => LayerConfiguration.java} (90%) rename cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/{MultiLayerConfigurationDeserializer.java => NeuralNetConfigurationDeserializer.java} (89%) create mode 100644 cavis-dnn/cavis-dnn-nn/src/main/resources/simplelogger.properties diff --git a/.old/deeplearning4j/deeplearning4j-dataimport-solrj/src/test/java/org/deeplearning4j/nn/dataimport/solr/client/solrj/io/stream/TupleStreamDataSetIteratorTest.java b/.old/deeplearning4j/deeplearning4j-dataimport-solrj/src/test/java/org/deeplearning4j/nn/dataimport/solr/client/solrj/io/stream/TupleStreamDataSetIteratorTest.java index 5c21d354a..67ad09bd1 100644 --- a/.old/deeplearning4j/deeplearning4j-dataimport-solrj/src/test/java/org/deeplearning4j/nn/dataimport/solr/client/solrj/io/stream/TupleStreamDataSetIteratorTest.java +++ b/.old/deeplearning4j/deeplearning4j-dataimport-solrj/src/test/java/org/deeplearning4j/nn/dataimport/solr/client/solrj/io/stream/TupleStreamDataSetIteratorTest.java @@ -205,7 +205,7 @@ public class TupleStreamDataSetIteratorTest extends SolrCloudTestCase { public void modelFitTest() throws Exception { final MultiLayerNetwork model = new MultiLayerNetwork( - new NeuralNetConfiguration.Builder() + NeuralNetConfiguration.builder() .list( new OutputLayer.Builder(LossFunction.MSE) .nIn(3) diff --git a/.old/deeplearning4j/deeplearning4j-modelexport-solr/src/test/java/org/deeplearning4j/nn/modelexport/solr/handler/ModelTupleStreamIntegrationTest.java b/.old/deeplearning4j/deeplearning4j-modelexport-solr/src/test/java/org/deeplearning4j/nn/modelexport/solr/handler/ModelTupleStreamIntegrationTest.java index 7c0505605..c2c260fdd 100644 --- a/.old/deeplearning4j/deeplearning4j-modelexport-solr/src/test/java/org/deeplearning4j/nn/modelexport/solr/handler/ModelTupleStreamIntegrationTest.java +++ b/.old/deeplearning4j/deeplearning4j-modelexport-solr/src/test/java/org/deeplearning4j/nn/modelexport/solr/handler/ModelTupleStreamIntegrationTest.java @@ -35,7 +35,7 @@ import org.apache.solr.client.solrj.request.UpdateRequest; import org.apache.solr.cloud.SolrCloudTestCase; import org.apache.solr.common.params.ModifiableSolrParams; import org.deeplearning4j.nn.api.Model; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; @@ -153,7 +153,7 @@ public class ModelTupleStreamIntegrationTest extends SolrCloudTestCase { final int numInputs = 3; final int numOutputs = 2; - final MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + final NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list( new OutputLayer.Builder() .nIn(numInputs) diff --git a/.old/deeplearning4j/deeplearning4j-modelexport-solr/src/test/java/org/deeplearning4j/nn/modelexport/solr/handler/ModelTupleStreamTest.java b/.old/deeplearning4j/deeplearning4j-modelexport-solr/src/test/java/org/deeplearning4j/nn/modelexport/solr/handler/ModelTupleStreamTest.java index a638fa14a..c6a05607b 100644 --- a/.old/deeplearning4j/deeplearning4j-modelexport-solr/src/test/java/org/deeplearning4j/nn/modelexport/solr/handler/ModelTupleStreamTest.java +++ b/.old/deeplearning4j/deeplearning4j-modelexport-solr/src/test/java/org/deeplearning4j/nn/modelexport/solr/handler/ModelTupleStreamTest.java @@ -43,7 +43,7 @@ import org.apache.solr.core.SolrResourceLoader; import org.apache.solr.handler.SolrDefaultStreamFactory; import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.graph.ComputationGraph; @@ -242,7 +242,7 @@ public class ModelTupleStreamTest { protected Model buildMultiLayerNetworkModel(int numInputs, int numOutputs) throws Exception { - final MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + final NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list( new OutputLayer.Builder() .nIn(numInputs) @@ -274,7 +274,7 @@ public class ModelTupleStreamTest { protected Model buildComputationGraphModel(int numInputs, int numOutputs) throws Exception { - final ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + final ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("inputLayer") .addLayer("outputLayer", diff --git a/.old/deeplearning4j/deeplearning4j-modelexport-solr/src/test/java/org/deeplearning4j/nn/modelexport/solr/ltr/model/ScoringModelTest.java b/.old/deeplearning4j/deeplearning4j-modelexport-solr/src/test/java/org/deeplearning4j/nn/modelexport/solr/ltr/model/ScoringModelTest.java index 7f77c6c0c..1986511bb 100644 --- a/.old/deeplearning4j/deeplearning4j-modelexport-solr/src/test/java/org/deeplearning4j/nn/modelexport/solr/ltr/model/ScoringModelTest.java +++ b/.old/deeplearning4j/deeplearning4j-modelexport-solr/src/test/java/org/deeplearning4j/nn/modelexport/solr/ltr/model/ScoringModelTest.java @@ -42,7 +42,7 @@ import org.apache.solr.ltr.norm.Normalizer; import org.apache.solr.request.SolrQueryRequest; import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.graph.ComputationGraph; @@ -192,7 +192,7 @@ public class ScoringModelTest { protected Model buildMultiLayerNetworkModel(int numFeatures) throws Exception { - final MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + final NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list( new OutputLayer.Builder().nIn(numFeatures).nOut(1).lossFunction(LossFunctions.LossFunction.MSE).activation(Activation.IDENTITY).build() ) @@ -217,7 +217,7 @@ public class ScoringModelTest { protected Model buildComputationGraphModel(int numFeatures) throws Exception { - final ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + final ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("inputLayer") .addLayer("outputLayer", diff --git a/.old/deeplearning4j/deeplearning4j-remote/deeplearning4j-json-server/src/test/java/org/deeplearning4j/remote/JsonModelServerTest.java b/.old/deeplearning4j/deeplearning4j-remote/deeplearning4j-json-server/src/test/java/org/deeplearning4j/remote/JsonModelServerTest.java index 1de161c2e..dd75472c6 100644 --- a/.old/deeplearning4j/deeplearning4j-remote/deeplearning4j-json-server/src/test/java/org/deeplearning4j/remote/JsonModelServerTest.java +++ b/.old/deeplearning4j/deeplearning4j-remote/deeplearning4j-json-server/src/test/java/org/deeplearning4j/remote/JsonModelServerTest.java @@ -23,7 +23,7 @@ import lombok.extern.slf4j.Slf4j; import lombok.val; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.graph.MergeVertex; import org.deeplearning4j.nn.conf.layers.*; @@ -70,7 +70,7 @@ public class JsonModelServerTest extends BaseDL4JTest { private static final MultiLayerNetwork model; static { - val conf = new NeuralNetConfiguration.Builder() + val conf = NeuralNetConfiguration.builder() .seed(119) .updater(new Adam(0.119f)) .weightInit(WeightInit.XAVIER) @@ -541,7 +541,7 @@ public class JsonModelServerTest extends BaseDL4JTest { @Test public void testMlnMnist() throws Exception { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() .layer(new DenseLayer.Builder().nIn(784).nOut(10).build()) .layer(new LossLayer.Builder().activation(Activation.SOFTMAX).build()) @@ -597,7 +597,7 @@ public class JsonModelServerTest extends BaseDL4JTest { @Test public void testCompGraph() throws Exception { - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("input1", "input2") .addLayer("L1", new DenseLayer.Builder().nIn(3).nOut(4).build(), "input1") @@ -652,7 +652,7 @@ public class JsonModelServerTest extends BaseDL4JTest { @Test public void testCompGraph_1() throws Exception { - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .updater(new Sgd(0.01)) .graphBuilder() .addInputs("input") diff --git a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/pw/SharedTrainingWrapper.java b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/pw/SharedTrainingWrapper.java index f3f2cee80..23bc7566d 100644 --- a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/pw/SharedTrainingWrapper.java +++ b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/pw/SharedTrainingWrapper.java @@ -425,7 +425,7 @@ public class SharedTrainingWrapper { .setTrainingWorkspaceMode(trainingConfiguration.getWorkspaceMode()); ((ComputationGraph) originalModel).setGradientsAccumulator(accumulator); } else if (model instanceof MultiLayerNetwork) { - ((MultiLayerNetwork) originalModel).getLayerWiseConfigurations() + ((MultiLayerNetwork) originalModel).getConfiguration() .setTrainingWorkspaceMode(trainingConfiguration.getWorkspaceMode()); ((MultiLayerNetwork) originalModel).setGradientsAccumulator(accumulator); } diff --git a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/training/SharedTrainingMaster.java b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/training/SharedTrainingMaster.java index 2a17ab3e1..d02cb4234 100644 --- a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/training/SharedTrainingMaster.java +++ b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/training/SharedTrainingMaster.java @@ -261,7 +261,7 @@ public class SharedTrainingMaster extends BaseTrainingMaster extends BaseVa /** * @param params MultiLayerNetwork parameters - * @param jsonConfig MultiLayerConfiguration, as json + * @param jsonConfig NeuralNetConfiguration, as json * @param useLogProbability If true: use log probability. False: use raw probability. * @param batchSize Batch size to use when scoring * @param numSamples Number of samples to use when calling {@link VariationalAutoencoder#reconstructionLogProbability(INDArray, int)} diff --git a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/common/score/BaseVaeScoreWithKeyFunction.java b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/common/score/BaseVaeScoreWithKeyFunction.java index 4140b8a53..cfcc93b78 100644 --- a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/common/score/BaseVaeScoreWithKeyFunction.java +++ b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/common/score/BaseVaeScoreWithKeyFunction.java @@ -45,7 +45,7 @@ public abstract class BaseVaeScoreWithKeyFunction implements PairFlatMapFunct /** * @param params MultiLayerNetwork parameters - * @param jsonConfig MultiLayerConfiguration, as json + * @param jsonConfig NeuralNetConfiguration, as json * @param batchSize Batch size to use when scoring */ public BaseVaeScoreWithKeyFunction(Broadcast params, Broadcast jsonConfig, int batchSize) { diff --git a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/evaluation/EvaluationRunner.java b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/evaluation/EvaluationRunner.java index 8550c6e3c..426682d69 100644 --- a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/evaluation/EvaluationRunner.java +++ b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/evaluation/EvaluationRunner.java @@ -27,7 +27,7 @@ import org.deeplearning4j.datasets.iterator.IteratorDataSetIterator; import org.deeplearning4j.datasets.iterator.IteratorMultiDataSetIterator; import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.nd4j.common.base.Preconditions; @@ -131,7 +131,7 @@ public class EvaluationRunner { cg.init(deviceLocalParams.get(), false); m = cg; } else { - MultiLayerConfiguration conf = MultiLayerConfiguration.fromJson(json.getValue()); + NeuralNetConfiguration conf = NeuralNetConfiguration.fromJson(json.getValue()); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(deviceLocalParams.get(), false); m = net; diff --git a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/graph/scoring/CGVaeReconstructionErrorWithKeyFunction.java b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/graph/scoring/CGVaeReconstructionErrorWithKeyFunction.java index d8aadc3f1..e13b5f9b6 100644 --- a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/graph/scoring/CGVaeReconstructionErrorWithKeyFunction.java +++ b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/graph/scoring/CGVaeReconstructionErrorWithKeyFunction.java @@ -33,7 +33,7 @@ public class CGVaeReconstructionErrorWithKeyFunction extends BaseVaeScoreWith /** * @param params MultiLayerNetwork parameters - * @param jsonConfig MultiLayerConfiguration, as json + * @param jsonConfig NeuralNetConfiguration, as json * @param batchSize Batch size to use when scoring */ public CGVaeReconstructionErrorWithKeyFunction(Broadcast params, Broadcast jsonConfig, diff --git a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/graph/scoring/CGVaeReconstructionProbWithKeyFunction.java b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/graph/scoring/CGVaeReconstructionProbWithKeyFunction.java index 57c568239..e9455092c 100644 --- a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/graph/scoring/CGVaeReconstructionProbWithKeyFunction.java +++ b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/graph/scoring/CGVaeReconstructionProbWithKeyFunction.java @@ -33,7 +33,7 @@ public class CGVaeReconstructionProbWithKeyFunction extends BaseVaeReconstruc /** * @param params MultiLayerNetwork parameters - * @param jsonConfig MultiLayerConfiguration, as json + * @param jsonConfig NeuralNetConfiguration, as json * @param useLogProbability If true: use log probability. False: use raw probability. * @param batchSize Batch size to use when scoring * @param numSamples Number of samples to use when calling {@link VariationalAutoencoder#reconstructionLogProbability(INDArray, int)} diff --git a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/multilayer/SparkDl4jMultiLayer.java b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/multilayer/SparkDl4jMultiLayer.java index be7780f2f..054520c70 100644 --- a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/multilayer/SparkDl4jMultiLayer.java +++ b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/multilayer/SparkDl4jMultiLayer.java @@ -35,7 +35,7 @@ import org.datavec.spark.util.BroadcastHadoopConfigHolder; import org.deeplearning4j.core.loader.DataSetLoader; import org.deeplearning4j.core.loader.MultiDataSetLoader; import org.deeplearning4j.core.loader.impl.SerializedDataSetLoader; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.FeedForwardLayer; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.spark.api.TrainingMaster; @@ -80,7 +80,7 @@ public class SparkDl4jMultiLayer extends SparkListenable { public static final int DEFAULT_ROC_THRESHOLD_STEPS = 32; public static final int DEFAULT_EVAL_WORKERS = 4; private transient JavaSparkContext sc; - private MultiLayerConfiguration conf; + private NeuralNetConfiguration conf; private MultiLayerNetwork network; private double lastScore; private int defaultEvaluationWorkers = DEFAULT_EVAL_WORKERS; @@ -104,7 +104,7 @@ public class SparkDl4jMultiLayer extends SparkListenable { * @param sparkContext the spark context to use * @param conf the configuration of the network */ - public SparkDl4jMultiLayer(SparkContext sparkContext, MultiLayerConfiguration conf, + public SparkDl4jMultiLayer(SparkContext sparkContext, NeuralNetConfiguration conf, TrainingMaster trainingMaster) { this(new JavaSparkContext(sparkContext), initNetwork(conf), trainingMaster); } @@ -115,14 +115,14 @@ public class SparkDl4jMultiLayer extends SparkListenable { * @param sc the spark context to use * @param conf the configuration of the network */ - public SparkDl4jMultiLayer(JavaSparkContext sc, MultiLayerConfiguration conf, TrainingMaster trainingMaster) { + public SparkDl4jMultiLayer(JavaSparkContext sc, NeuralNetConfiguration conf, TrainingMaster trainingMaster) { this(sc.sc(), conf, trainingMaster); } public SparkDl4jMultiLayer(JavaSparkContext javaSparkContext, MultiLayerNetwork network, TrainingMaster trainingMaster) { sc = javaSparkContext; - this.conf = network.getLayerWiseConfigurations().clone(); + this.conf = network.getConfiguration().clone(); this.network = network; if (!network.isInitCalled()) network.init(); @@ -132,7 +132,7 @@ public class SparkDl4jMultiLayer extends SparkListenable { SparkUtils.checkKryoConfiguration(javaSparkContext, log); } - private static MultiLayerNetwork initNetwork(MultiLayerConfiguration conf) { + private static MultiLayerNetwork initNetwork(NeuralNetConfiguration conf) { MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); return net; @@ -315,8 +315,8 @@ public class SparkDl4jMultiLayer extends SparkListenable { * @return the multi layer network that was fitDataSet */ public MultiLayerNetwork fitLabeledPoint(JavaRDD rdd) { - int nLayers = network.getLayerWiseConfigurations().getConfs().size(); - FeedForwardLayer ffl = (FeedForwardLayer) network.getLayerWiseConfigurations().getConf(nLayers - 1).getLayer(); + int nLayers = network.getConfiguration().getConfs().size(); + FeedForwardLayer ffl = (FeedForwardLayer) network.getConfiguration().getConf(nLayers - 1).getLayer(); JavaRDD ds = MLLibUtil.fromLabeledPoint(sc, rdd, ffl.getNOut()); return fit(ds); } diff --git a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/multilayer/scoring/FeedForwardWithKeyFunction.java b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/multilayer/scoring/FeedForwardWithKeyFunction.java index 510f2e4d4..c064c81d0 100644 --- a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/multilayer/scoring/FeedForwardWithKeyFunction.java +++ b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/multilayer/scoring/FeedForwardWithKeyFunction.java @@ -22,7 +22,7 @@ package org.deeplearning4j.spark.impl.multilayer.scoring; import org.apache.spark.api.java.function.PairFlatMapFunction; import org.apache.spark.broadcast.Broadcast; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.api.DataSetUtil; @@ -49,7 +49,7 @@ public class FeedForwardWithKeyFunction /** * @param params MultiLayerNetwork parameters - * @param jsonConfig MultiLayerConfiguration, as json + * @param jsonConfig NeuralNetConfiguration, as json * @param batchSize Batch size to use for forward pass (use > 1 for efficiency) */ public FeedForwardWithKeyFunction(Broadcast params, Broadcast jsonConfig, int batchSize) { @@ -65,7 +65,7 @@ public class FeedForwardWithKeyFunction return Collections.emptyIterator(); } - MultiLayerNetwork network = new MultiLayerNetwork(MultiLayerConfiguration.fromJson(jsonConfig.getValue())); + MultiLayerNetwork network = new MultiLayerNetwork(NeuralNetConfiguration.fromJson(jsonConfig.getValue())); network.init(); INDArray val = params.value().unsafeDuplication(); if (val.length() != network.numParams(false)) diff --git a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/multilayer/scoring/ScoreExamplesFunction.java b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/multilayer/scoring/ScoreExamplesFunction.java index 6c3878da5..a8990125d 100644 --- a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/multilayer/scoring/ScoreExamplesFunction.java +++ b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/multilayer/scoring/ScoreExamplesFunction.java @@ -23,7 +23,7 @@ package org.deeplearning4j.spark.impl.multilayer.scoring; import org.apache.spark.api.java.function.DoubleFlatMapFunction; import org.apache.spark.api.java.function.FlatMapFunction; import org.apache.spark.broadcast.Broadcast; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.DataSet; @@ -60,7 +60,7 @@ public class ScoreExamplesFunction implements DoubleFlatMapFunction implements PairFlatMapFunction implements PairFlatMapFunction, DataSetIterator iter = new IteratorDataSetIterator(dataSetIterator, minibatchSize); //Does batching where appropriate - MultiLayerNetwork network = new MultiLayerNetwork(MultiLayerConfiguration.fromJson(json)); + MultiLayerNetwork network = new MultiLayerNetwork(NeuralNetConfiguration.fromJson(json)); network.init(); INDArray val = params.value().unsafeDuplication(); //.value() object will be shared by all executors on each machine -> OK, as params are not modified by score function if (val.length() != network.numParams(false)) diff --git a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/multilayer/scoring/VaeReconstructionErrorWithKeyFunction.java b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/multilayer/scoring/VaeReconstructionErrorWithKeyFunction.java index 3f7c5ba6c..95c0c721e 100644 --- a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/multilayer/scoring/VaeReconstructionErrorWithKeyFunction.java +++ b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/multilayer/scoring/VaeReconstructionErrorWithKeyFunction.java @@ -22,7 +22,7 @@ package org.deeplearning4j.spark.impl.multilayer.scoring; import org.apache.spark.broadcast.Broadcast; import org.deeplearning4j.nn.api.Layer; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.layers.variational.VariationalAutoencoder; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.spark.impl.common.score.BaseVaeScoreWithKeyFunction; @@ -36,7 +36,7 @@ public class VaeReconstructionErrorWithKeyFunction extends BaseVaeScoreWithKe /** * @param params MultiLayerNetwork parameters - * @param jsonConfig MultiLayerConfiguration, as json + * @param jsonConfig NeuralNetConfiguration, as json * @param batchSize Batch size to use when scoring */ public VaeReconstructionErrorWithKeyFunction(Broadcast params, Broadcast jsonConfig, @@ -47,7 +47,7 @@ public class VaeReconstructionErrorWithKeyFunction extends BaseVaeScoreWithKe @Override public VariationalAutoencoder getVaeLayer() { MultiLayerNetwork network = - new MultiLayerNetwork(MultiLayerConfiguration.fromJson((String) jsonConfig.getValue())); + new MultiLayerNetwork(NeuralNetConfiguration.fromJson((String) jsonConfig.getValue())); network.init(); INDArray val = ((INDArray) params.value()).unsafeDuplication(); if (val.length() != network.numParams(false)) diff --git a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/multilayer/scoring/VaeReconstructionProbWithKeyFunction.java b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/multilayer/scoring/VaeReconstructionProbWithKeyFunction.java index d9dd8a155..18890d020 100644 --- a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/multilayer/scoring/VaeReconstructionProbWithKeyFunction.java +++ b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/multilayer/scoring/VaeReconstructionProbWithKeyFunction.java @@ -22,7 +22,7 @@ package org.deeplearning4j.spark.impl.multilayer.scoring; import org.apache.spark.broadcast.Broadcast; import org.deeplearning4j.nn.api.Layer; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.layers.variational.VariationalAutoencoder; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.spark.impl.common.score.BaseVaeReconstructionProbWithKeyFunction; @@ -34,7 +34,7 @@ public class VaeReconstructionProbWithKeyFunction extends BaseVaeReconstructi /** * @param params MultiLayerNetwork parameters - * @param jsonConfig MultiLayerConfiguration, as json + * @param jsonConfig NeuralNetConfiguration, as json * @param useLogProbability If true: use log probability. False: use raw probability. * @param batchSize Batch size to use when scoring * @param numSamples Number of samples to use when calling {@link VariationalAutoencoder#reconstructionLogProbability(INDArray, int)} @@ -47,7 +47,7 @@ public class VaeReconstructionProbWithKeyFunction extends BaseVaeReconstructi @Override public VariationalAutoencoder getVaeLayer() { MultiLayerNetwork network = - new MultiLayerNetwork(MultiLayerConfiguration.fromJson((String) jsonConfig.getValue())); + new MultiLayerNetwork(NeuralNetConfiguration.fromJson((String) jsonConfig.getValue())); network.init(); INDArray val = ((INDArray) params.value()).unsafeDuplication(); if (val.length() != network.numParams(false)) diff --git a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/paramavg/ParameterAveragingTrainingMaster.java b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/paramavg/ParameterAveragingTrainingMaster.java index 8d8532e0b..411422884 100644 --- a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/paramavg/ParameterAveragingTrainingMaster.java +++ b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/paramavg/ParameterAveragingTrainingMaster.java @@ -41,7 +41,7 @@ import org.deeplearning4j.core.storage.StatsStorageRouter; import org.deeplearning4j.core.storage.StatsStorageRouterProvider; import org.deeplearning4j.core.storage.StorageMetaData; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.optimize.api.TrainingListener; @@ -274,7 +274,7 @@ public class ParameterAveragingTrainingMaster @Override public ParameterAveragingTrainingWorker getWorkerInstance(SparkDl4jMultiLayer network) { - NetBroadcastTuple tuple = new NetBroadcastTuple(network.getNetwork().getLayerWiseConfigurations(), + NetBroadcastTuple tuple = new NetBroadcastTuple(network.getNetwork().getConfiguration(), network.getNetwork().params(), network.getNetwork().getUpdater().getStateViewArray()); if (collectTrainingStats) @@ -726,7 +726,7 @@ public class ParameterAveragingTrainingMaster if (params != null) { //Params may be null for edge case (empty RDD) if (network != null) { - MultiLayerConfiguration conf = network.getNetwork().getLayerWiseConfigurations(); + NeuralNetConfiguration conf = network.getNetwork().getConfiguration(); int numUpdates = averagingFrequency; conf.setIterationCount(conf.getIterationCount() + numUpdates); } else { diff --git a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/BaseSparkTest.java b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/BaseSparkTest.java index e00f8d6d3..686560ffc 100644 --- a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/BaseSparkTest.java +++ b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/BaseSparkTest.java @@ -26,7 +26,7 @@ import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.datavec.spark.util.SerializableHadoopConfig; import org.deeplearning4j.BaseDL4JTest; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.spark.impl.multilayer.SparkDl4jMultiLayer; import org.deeplearning4j.spark.impl.paramavg.ParameterAveragingTrainingMaster; @@ -129,8 +129,8 @@ public abstract class BaseSparkTest extends BaseDL4JTest implements Serializable return 4; } - protected MultiLayerConfiguration getBasicConf() { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(123) + protected NeuralNetConfiguration getBasicConf() { + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(123) .updater(new Nesterovs(0.1, 0.9)).list() .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(nIn).nOut(3) .activation(Activation.TANH).build()) diff --git a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSpark.java b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSpark.java index ed8de3623..7154808f6 100644 --- a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSpark.java +++ b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSpark.java @@ -35,7 +35,7 @@ import org.deeplearning4j.earlystopping.termination.MaxTimeIterationTerminationC import org.deeplearning4j.earlystopping.termination.ScoreImprovementEpochTerminationCondition; import org.deeplearning4j.earlystopping.trainer.IEarlyStoppingTrainer; import org.deeplearning4j.nn.api.OptimizationAlgorithm; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; @@ -68,7 +68,7 @@ public class TestEarlyStoppingSpark extends BaseSparkTest { //Spark tests don't run on windows return; } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd()).weightInit(WeightInit.XAVIER).list() .layer(0, new OutputLayer.Builder().nIn(4).nOut(3) @@ -123,7 +123,7 @@ public class TestEarlyStoppingSpark extends BaseSparkTest { return; } Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(10.0)) //Intentionally huge LR .weightInit(WeightInit.XAVIER).list() @@ -163,7 +163,7 @@ public class TestEarlyStoppingSpark extends BaseSparkTest { return; } Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(1e-6)).weightInit(WeightInit.XAVIER).list() .layer(0, new OutputLayer.Builder().nIn(4).nOut(3) @@ -209,7 +209,7 @@ public class TestEarlyStoppingSpark extends BaseSparkTest { return; } Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(0.0)).weightInit(WeightInit.XAVIER).list() .layer(0, new OutputLayer.Builder().nIn(4).nOut(3) @@ -246,7 +246,7 @@ public class TestEarlyStoppingSpark extends BaseSparkTest { //Spark tests don't run on windows return; } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd()).weightInit(WeightInit.XAVIER).list() .layer(0, new OutputLayer.Builder().nIn(4).nOut(3) diff --git a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSparkCompGraph.java b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSparkCompGraph.java index 3de17a742..76fa0e65b 100644 --- a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSparkCompGraph.java +++ b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSparkCompGraph.java @@ -71,7 +71,7 @@ public class TestEarlyStoppingSparkCompGraph extends BaseSparkTest { //Spark tests don't run on windows return; } - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd()).weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in") .addLayer("0", new OutputLayer.Builder().nIn(4).nOut(3) @@ -124,7 +124,7 @@ public class TestEarlyStoppingSparkCompGraph extends BaseSparkTest { return; } Nd4j.getRandom().setSeed(12345); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(2.0)) //Intentionally huge LR .weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in") @@ -165,7 +165,7 @@ public class TestEarlyStoppingSparkCompGraph extends BaseSparkTest { return; } Nd4j.getRandom().setSeed(12345); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(1e-6)).weightInit(WeightInit.XAVIER).graphBuilder() .addInputs("in") @@ -213,7 +213,7 @@ public class TestEarlyStoppingSparkCompGraph extends BaseSparkTest { return; } Nd4j.getRandom().setSeed(12345); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(0.0)).weightInit(WeightInit.XAVIER).graphBuilder() .addInputs("in") @@ -253,7 +253,7 @@ public class TestEarlyStoppingSparkCompGraph extends BaseSparkTest { //Spark tests don't run on windows return; } - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd()).weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in") .addLayer("0", new OutputLayer.Builder().nIn(4).nOut(3) diff --git a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/TestKryo.java b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/TestKryo.java index 33023d605..47f1807d0 100644 --- a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/TestKryo.java +++ b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/TestKryo.java @@ -22,7 +22,7 @@ package org.deeplearning4j.spark; import org.apache.spark.serializer.SerializerInstance; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.distribution.UniformDistribution; import org.deeplearning4j.nn.conf.graph.*; @@ -68,14 +68,14 @@ public class TestKryo extends BaseSparkKryoTest { Map m = new HashMap<>(); m.put(0, 0.5); m.put(10, 0.1); - MultiLayerConfiguration mlc = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration mlc = NeuralNetConfiguration.builder() .updater(new Nadam(new MapSchedule(ScheduleType.ITERATION,m))).list().layer(0, new OutputLayer.Builder().nIn(10).nOut(10).build()) .build(); testSerialization(mlc, si); - ComputationGraphConfiguration cgc = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration cgc = NeuralNetConfiguration.builder() .dist(new UniformDistribution(-1, 1)) .updater(new Adam(new MapSchedule(ScheduleType.ITERATION,m))) .graphBuilder() diff --git a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/datavec/TestPreProcessedData.java b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/datavec/TestPreProcessedData.java index 714c3ffb6..946f8816f 100644 --- a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/datavec/TestPreProcessedData.java +++ b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/datavec/TestPreProcessedData.java @@ -30,7 +30,7 @@ import org.datavec.api.records.reader.impl.csv.CSVRecordReader; import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.spark.BaseSparkTest; @@ -84,7 +84,7 @@ public class TestPreProcessedData extends BaseSparkTest { iter.next().save(f2); } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.RMSPROP) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(Updater.RMSPROP) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(4).nOut(3) .activation(Activation.TANH).build()) @@ -134,7 +134,7 @@ public class TestPreProcessedData extends BaseSparkTest { iter.next().save(f2); } - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.RMSPROP) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().updater(Updater.RMSPROP) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .graphBuilder().addInputs("in") .addLayer("0", new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(4).nOut(3) @@ -188,7 +188,7 @@ public class TestPreProcessedData extends BaseSparkTest { mds.save(f2); } - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.RMSPROP) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().updater(Updater.RMSPROP) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .graphBuilder().addInputs("in") .addLayer("0", new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(4).nOut(3) diff --git a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/TestKryoWarning.java b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/TestKryoWarning.java index ec2195081..6aa102fb4 100644 --- a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/TestKryoWarning.java +++ b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/TestKryoWarning.java @@ -23,7 +23,7 @@ package org.deeplearning4j.spark.impl; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaSparkContext; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.spark.api.TrainingMaster; @@ -40,7 +40,7 @@ public class TestKryoWarning { try { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() .layer(0, new OutputLayer.Builder().nIn(10).nOut(10).build()) .build(); @@ -57,7 +57,7 @@ public class TestKryoWarning { try { - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().graphBuilder().addInputs("in") + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().graphBuilder().addInputs("in") .addLayer("0", new OutputLayer.Builder().nIn(10).nOut(10).build(), "in").setOutputs("0") .build(); diff --git a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/customlayer/TestCustomLayer.java b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/customlayer/TestCustomLayer.java index b3c96333d..1b7bf1052 100644 --- a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/customlayer/TestCustomLayer.java +++ b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/customlayer/TestCustomLayer.java @@ -22,7 +22,7 @@ package org.deeplearning4j.spark.impl.customlayer; import com.sun.jna.Platform; import org.apache.spark.api.java.JavaRDD; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; @@ -51,8 +51,8 @@ public class TestCustomLayer extends BaseSparkTest { } //Basic test - checks whether exceptions etc are thrown with custom layers + spark //Custom layers are tested more extensively in dl4j core - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().updater(new Sgd(0.1)).list() + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder().updater(new Sgd(0.1)).list() .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()) .layer(1, new CustomLayer(3.14159)).layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) diff --git a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/graph/TestSparkComputationGraph.java b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/graph/TestSparkComputationGraph.java index cc6e5f9ec..7a28146fb 100644 --- a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/graph/TestSparkComputationGraph.java +++ b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/graph/TestSparkComputationGraph.java @@ -77,7 +77,7 @@ public class TestSparkComputationGraph extends BaseSparkTest { public static ComputationGraph getBasicNetIris2Class() { - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER) .graphBuilder().addInputs("in") .addLayer("l0", new DenseLayer.Builder().nIn(4).nOut(10).build(), "in") .addLayer("l1", new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) @@ -104,7 +104,7 @@ public class TestSparkComputationGraph extends BaseSparkTest { while (iter.hasNext()) list.add(iter.next()); - ComputationGraphConfiguration config = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration config = NeuralNetConfiguration.builder() .updater(new Sgd(0.1)) .graphBuilder().addInputs("in") .addLayer("dense", new DenseLayer.Builder().nIn(4).nOut(2).build(), "in").addLayer("out", @@ -138,7 +138,7 @@ public class TestSparkComputationGraph extends BaseSparkTest { @Test public void testDistributedScoring() { - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().l1(0.1).l2(0.1) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().l1(0.1).l2(0.1) .seed(123).updater(new Nesterovs(0.1, 0.9)).graphBuilder() .addInputs("in") .addLayer("0", new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(nIn).nOut(3) @@ -217,7 +217,7 @@ public class TestSparkComputationGraph extends BaseSparkTest { //@Ignore("AB 2019/05/23 - Failing on CI only - passing locally. Possible precision or threading issue") public void testSeedRepeatability() throws Exception { - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).updater(Updater.RMSPROP) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345).updater(Updater.RMSPROP) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in") .addLayer("0", new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(4).nOut(4) @@ -414,7 +414,7 @@ public class TestSparkComputationGraph extends BaseSparkTest { JavaRDD rdd = sc.parallelize(l); // simple model - val modelConf = new NeuralNetConfiguration.Builder() + val modelConf = NeuralNetConfiguration.builder() .updater(new Adam(0.01)) .weightInit(WeightInit.XAVIER_UNIFORM) .biasInit(0) diff --git a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/misc/TestFrozenLayers.java b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/misc/TestFrozenLayers.java index 887696af3..f0d15745d 100644 --- a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/misc/TestFrozenLayers.java +++ b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/misc/TestFrozenLayers.java @@ -53,7 +53,7 @@ public class TestFrozenLayers extends BaseSparkTest { @Test public void testSparkFrozenLayers() { - NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1)) + NeuralNetConfiguration.NeuralNetConfigurationBuilder overallConf = NeuralNetConfiguration.builder().updater(new Sgd(0.1)) .activation(Activation.TANH); FineTuneConfiguration finetune = new FineTuneConfiguration.Builder().updater(new Sgd(0.1)).build(); @@ -136,7 +136,7 @@ public class TestFrozenLayers extends BaseSparkTest { int nIn = 6; int nOut = 3; - ComputationGraph origModel = new ComputationGraph(new NeuralNetConfiguration.Builder().updater(new Sgd(0.1)) + ComputationGraph origModel = new ComputationGraph(NeuralNetConfiguration.builder().updater(new Sgd(0.1)) .activation(Activation.TANH).graphBuilder().addInputs("in") .addLayer("0", new DenseLayer.Builder().nIn(6).nOut(5).build(), "in") .addLayer("1", new DenseLayer.Builder().nIn(5).nOut(4).build(), "0") diff --git a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/multilayer/TestMiscFunctions.java b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/multilayer/TestMiscFunctions.java index 550ccc9b2..adc3d5508 100644 --- a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/multilayer/TestMiscFunctions.java +++ b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/multilayer/TestMiscFunctions.java @@ -23,7 +23,7 @@ package org.deeplearning4j.spark.impl.multilayer; import org.apache.spark.api.java.JavaPairRDD; import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.*; import org.deeplearning4j.nn.conf.layers.variational.GaussianReconstructionDistribution; @@ -57,7 +57,7 @@ public class TestMiscFunctions extends BaseSparkTest { @Test public void testFeedForwardWithKey() { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER).list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER).list() .layer(0, new DenseLayer.Builder().nIn(4).nOut(3).build()) .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(3).nOut(3) .activation(Activation.SOFTMAX).build()) @@ -107,7 +107,7 @@ public class TestMiscFunctions extends BaseSparkTest { @Test public void testFeedForwardWithKeyInputMask() { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER) .list() .layer( new LSTM.Builder().nIn(4).nOut(3).build()) .layer(new GlobalPoolingLayer(PoolingType.AVG)) @@ -162,7 +162,7 @@ public class TestMiscFunctions extends BaseSparkTest { @Test public void testFeedForwardWithKeyGraph() { - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER) .graphBuilder().addInputs("in1", "in2") .addLayer("0", new DenseLayer.Builder().nIn(4).nOut(3).build(), "in1") .addLayer("1", new DenseLayer.Builder().nIn(4).nOut(3).build(), "in2").addLayer("2", @@ -220,7 +220,7 @@ public class TestMiscFunctions extends BaseSparkTest { int nIn = 10; - MultiLayerConfiguration mlc = new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration mlc = NeuralNetConfiguration.builder().list() .layer(0, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder() .reconstructionDistribution( new GaussianReconstructionDistribution(Activation.IDENTITY)) @@ -259,7 +259,7 @@ public class TestMiscFunctions extends BaseSparkTest { int nIn = 10; - MultiLayerConfiguration mlc = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration mlc = NeuralNetConfiguration.builder() .list().layer(0, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder() .reconstructionDistribution(new LossFunctionWrapper( diff --git a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/multilayer/TestSparkDl4jMultiLayer.java b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/multilayer/TestSparkDl4jMultiLayer.java index c64618557..e66e8bb9d 100644 --- a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/multilayer/TestSparkDl4jMultiLayer.java +++ b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/multilayer/TestSparkDl4jMultiLayer.java @@ -25,7 +25,7 @@ import lombok.extern.slf4j.Slf4j; import org.apache.spark.api.java.JavaRDD; import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; import org.deeplearning4j.nn.api.OptimizationAlgorithm; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; @@ -102,7 +102,7 @@ public class TestSparkDl4jMultiLayer extends BaseSparkTest { //---------------------------------- //Create network configuration and conduct network training - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.FLOAT) .seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) diff --git a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestCompareParameterAveragingSparkVsSingleMachine.java b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestCompareParameterAveragingSparkVsSingleMachine.java index cbe7247bd..e5faa2884 100644 --- a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestCompareParameterAveragingSparkVsSingleMachine.java +++ b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestCompareParameterAveragingSparkVsSingleMachine.java @@ -26,7 +26,7 @@ import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; @@ -63,9 +63,9 @@ public class TestCompareParameterAveragingSparkVsSingleMachine { } - private static MultiLayerConfiguration getConf(int seed, IUpdater updater) { + private static NeuralNetConfiguration getConf(int seed, IUpdater updater) { Nd4j.getRandom().setSeed(seed); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(WeightInit.XAVIER).updater(updater).seed(seed).list() .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()).layer(1, new OutputLayer.Builder() @@ -74,9 +74,9 @@ public class TestCompareParameterAveragingSparkVsSingleMachine { return conf; } - private static MultiLayerConfiguration getConfCNN(int seed, IUpdater updater) { + private static NeuralNetConfiguration getConfCNN(int seed, IUpdater updater) { Nd4j.getRandom().setSeed(seed); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(WeightInit.XAVIER).updater(updater).seed(seed).list() .layer(0, new ConvolutionLayer.Builder().nOut(3).kernelSize(2, 2).stride(1, 1).padding(0, 0) @@ -85,13 +85,13 @@ public class TestCompareParameterAveragingSparkVsSingleMachine { .activation(Activation.TANH).build()) .layer(1, new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nOut(10) .build()) - .setInputType(InputType.convolutional(10, 10, 3)).build(); + .inputType(InputType.convolutional(10, 10, 3)).build(); return conf; } private static ComputationGraphConfiguration getGraphConf(int seed, IUpdater updater) { Nd4j.getRandom().setSeed(seed); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(WeightInit.XAVIER).updater(updater).seed(seed).graphBuilder() .addInputs("in") @@ -105,7 +105,7 @@ public class TestCompareParameterAveragingSparkVsSingleMachine { private static ComputationGraphConfiguration getGraphConfCNN(int seed, IUpdater updater) { Nd4j.getRandom().setSeed(seed); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(WeightInit.XAVIER).updater(updater).seed(seed).graphBuilder() .addInputs("in") diff --git a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestSparkMultiLayerParameterAveraging.java b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestSparkMultiLayerParameterAveraging.java index bc1ced484..8907c2165 100644 --- a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestSparkMultiLayerParameterAveraging.java +++ b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestSparkMultiLayerParameterAveraging.java @@ -37,7 +37,7 @@ import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.BaseLayer; import org.deeplearning4j.nn.conf.layers.BatchNormalization; @@ -127,7 +127,7 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { .toJavaRDD().map(new TestFn()); DataSet d = new IrisDataSetIterator(150, 150).next(); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(123) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(123) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() .layer(0, new DenseLayer.Builder().nIn(4).nOut(100).weightInit(WeightInit.XAVIER) .activation(Activation.RELU).build()) @@ -162,8 +162,8 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { .getAbsolutePath()) .toJavaRDD().map(new TestFn()); - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().seed(123) + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder().seed(123) .updater(new Adam(1e-6)) .weightInit(WeightInit.XAVIER) .list() @@ -275,7 +275,7 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { //Spark tests don't run on windows return; } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new RmsProp()) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new RmsProp()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(nIn).nOut(3) .activation(Activation.TANH).build()) @@ -300,7 +300,7 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { @Test public void testDistributedScoring() { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().l1(0.1).l2(0.1) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().l1(0.1).l2(0.1) .seed(123).updater(new Nesterovs(0.1, 0.9)).list() .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(nIn).nOut(3) .activation(Activation.TANH).build()) @@ -389,7 +389,7 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { list.add(iter.next()); } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new RmsProp()) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new RmsProp()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(28 * 28).nOut(50) .activation(Activation.TANH).build()) @@ -453,7 +453,7 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new RmsProp()) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new RmsProp()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(28 * 28).nOut(50) .activation(Activation.TANH).build()) @@ -523,7 +523,7 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new RmsProp()) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new RmsProp()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(28 * 28).nOut(50) .activation(Activation.TANH).build()) @@ -611,7 +611,7 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().updater(new RmsProp()) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().updater(new RmsProp()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .graphBuilder().addInputs("in") .addLayer("0", new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(28 * 28).nOut(50) @@ -684,7 +684,7 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { //Spark tests don't run on windows return; } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).updater(new RmsProp()) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345).updater(new RmsProp()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(WeightInit.XAVIER).list() .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(4).nOut(4) @@ -769,7 +769,7 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { list.add(iter.next()); } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new RmsProp()) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new RmsProp()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(28 * 28).nOut(50) .activation(Activation.TANH).build()) @@ -791,13 +791,13 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { JavaRDD rdd = sc.parallelize(list); - assertEquals(0, sparkNet.getNetwork().getLayerWiseConfigurations().getIterationCount()); + assertEquals(0, sparkNet.getNetwork().getConfiguration().getIterationCount()); sparkNet.fit(rdd); assertEquals(minibatchesPerWorkerPerEpoch, - sparkNet.getNetwork().getLayerWiseConfigurations().getIterationCount()); + sparkNet.getNetwork().getConfiguration().getIterationCount()); sparkNet.fit(rdd); assertEquals(2 * minibatchesPerWorkerPerEpoch, - sparkNet.getNetwork().getLayerWiseConfigurations().getIterationCount()); + sparkNet.getNetwork().getConfiguration().getIterationCount()); sparkNet.getTrainingMaster().deleteTempFiles(sc); } @@ -819,7 +819,7 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { list.add(iter.next()); } - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().updater(new RmsProp()) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().updater(new RmsProp()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .graphBuilder().addInputs("in") .addLayer("0", new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(28 * 28).nOut(50) @@ -860,7 +860,7 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { int nIn = 8; Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).updater(new RmsProp()) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345).updater(new RmsProp()) .weightInit(WeightInit.XAVIER).list() .layer(0, new VariationalAutoencoder.Builder().nIn(8).nOut(10).encoderLayerSizes(12) .decoderLayerSizes(13).reconstructionDistribution( @@ -896,7 +896,7 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { int nIn = 8; Nd4j.getRandom().setSeed(12345); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).updater(new RmsProp()) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345).updater(new RmsProp()) .weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in") .addLayer("0", new VariationalAutoencoder.Builder().nIn(8).nOut(10).encoderLayerSizes(12) .decoderLayerSizes(13).reconstructionDistribution( @@ -936,8 +936,8 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { int nOut = 2; int layerSize = 10; - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER).list() + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER).list() .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(layerSize).build()) .layer(1, new OutputLayer.Builder().nIn(layerSize).nOut(nOut) .activation(Activation.SOFTMAX).lossFunction( @@ -991,8 +991,8 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { int nOut = 3; int layerSize = 10; - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER).list() + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER).list() .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(layerSize).build()) .layer(1, new OutputLayer.Builder().nIn(layerSize).nOut(nOut) .activation(Activation.SOFTMAX).lossFunction( @@ -1045,12 +1045,12 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { //Spark tests don't run on windows return; } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() .layer(new OutputLayer.Builder().nIn(4).nOut(3).build()) .build(); - ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf2 = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") .addLayer("out", new OutputLayer.Builder().nIn(4).nOut(3).build(), "in") @@ -1081,11 +1081,11 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { for(int i=0; i<3; i++ ){ - assertEquals(i, sn1.getNetwork().getLayerWiseConfigurations().getEpochCount()); + assertEquals(i, sn1.getNetwork().getConfiguration().getEpochCount()); assertEquals(i, sn2.getNetwork().getConfiguration().getEpochCount()); sn1.fit(rdd); sn2.fit(rdd); - assertEquals(i+1, sn1.getNetwork().getLayerWiseConfigurations().getEpochCount()); + assertEquals(i+1, sn1.getNetwork().getConfiguration().getEpochCount()); assertEquals(i+1, sn2.getNetwork().getConfiguration().getEpochCount()); } } diff --git a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/stats/TestTrainingStatsCollection.java b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/stats/TestTrainingStatsCollection.java index f4939e369..fc446048f 100644 --- a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/stats/TestTrainingStatsCollection.java +++ b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/stats/TestTrainingStatsCollection.java @@ -26,7 +26,7 @@ import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.deeplearning4j.nn.api.OptimizationAlgorithm; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; @@ -67,7 +67,7 @@ public class TestTrainingStatsCollection extends BaseSparkTest { try { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()) .layer(1, new OutputLayer.Builder().nIn(10).nOut(10).build()) diff --git a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/ui/TestListeners.java b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/ui/TestListeners.java index 6f79d7595..6d8a9e9bd 100644 --- a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/ui/TestListeners.java +++ b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/ui/TestListeners.java @@ -27,7 +27,7 @@ import org.deeplearning4j.core.storage.Persistable; import org.deeplearning4j.core.storage.StatsStorage; import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator; import org.deeplearning4j.nn.api.OptimizationAlgorithm; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; @@ -60,7 +60,7 @@ public class TestListeners extends BaseSparkTest { JavaSparkContext sc = getContext(); int nExecutors = numExecutors(); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(123) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(123) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() .layer(0, new DenseLayer.Builder().nIn(4).nOut(100).weightInit(WeightInit.XAVIER) .activation(Activation.RELU).build()) diff --git a/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/MultiLayerNetworkHandler.java b/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/MultiLayerNetworkHandler.java index 58389d74e..37bd5c2a9 100644 --- a/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/MultiLayerNetworkHandler.java +++ b/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/MultiLayerNetworkHandler.java @@ -20,7 +20,7 @@ package org.deeplearning4j.rl4j.network; import lombok.Getter; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.recurrent.RnnOutputLayer; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; @@ -41,7 +41,7 @@ public class MultiLayerNetworkHandler implements INetworkHandler { @Getter private final boolean recurrent; - private final MultiLayerConfiguration configuration; + private final NeuralNetConfiguration configuration; private final String labelName; private final String gradientName; private final int inputFeatureIdx; @@ -59,7 +59,7 @@ public class MultiLayerNetworkHandler implements INetworkHandler { int inputFeatureIdx) { this.model = model; recurrent = model.getOutputLayer() instanceof RnnOutputLayer; - configuration = model.getLayerWiseConfigurations(); + configuration = model.getConfiguration(); this.labelName = labelName; this.gradientName = gradientName; this.inputFeatureIdx = inputFeatureIdx; diff --git a/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/ac/ActorCriticFactoryCompGraphStdConv.java b/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/ac/ActorCriticFactoryCompGraphStdConv.java index cda26645f..ed8ceacda 100644 --- a/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/ac/ActorCriticFactoryCompGraphStdConv.java +++ b/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/ac/ActorCriticFactoryCompGraphStdConv.java @@ -59,7 +59,7 @@ public class ActorCriticFactoryCompGraphStdConv implements ActorCriticFactoryCom int w = (((shapeInputs[2] - 8) / 4 + 1) - 4) / 2 + 1; ComputationGraphConfiguration.GraphBuilder confB = - new NeuralNetConfiguration.Builder().seed(Constants.NEURAL_NET_SEED) + NeuralNetConfiguration.builder().seed(Constants.NEURAL_NET_SEED) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(conf.getUpdater() != null ? conf.getUpdater() : new Adam()) .weightInit(WeightInit.XAVIER) diff --git a/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/ac/ActorCriticFactoryCompGraphStdDense.java b/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/ac/ActorCriticFactoryCompGraphStdDense.java index 65e409b83..f05d43f3b 100644 --- a/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/ac/ActorCriticFactoryCompGraphStdDense.java +++ b/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/ac/ActorCriticFactoryCompGraphStdDense.java @@ -49,7 +49,7 @@ public class ActorCriticFactoryCompGraphStdDense implements ActorCriticFactoryCo nIn *= i; } ComputationGraphConfiguration.GraphBuilder confB = - new NeuralNetConfiguration.Builder().seed(Constants.NEURAL_NET_SEED) + NeuralNetConfiguration.builder().seed(Constants.NEURAL_NET_SEED) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(conf.getUpdater() != null ? conf.getUpdater() : new Adam()) .weightInit(WeightInit.XAVIER) diff --git a/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/ac/ActorCriticFactorySeparateStdDense.java b/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/ac/ActorCriticFactorySeparateStdDense.java index 8f8b739d8..80cb6384b 100644 --- a/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/ac/ActorCriticFactorySeparateStdDense.java +++ b/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/ac/ActorCriticFactorySeparateStdDense.java @@ -24,7 +24,7 @@ import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Value; import org.deeplearning4j.nn.api.OptimizationAlgorithm; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.DenseLayer; @@ -56,7 +56,7 @@ public class ActorCriticFactorySeparateStdDense implements ActorCriticFactorySep for (int i : numInputs) { nIn *= i; } - NeuralNetConfiguration.ListBuilder confB = new NeuralNetConfiguration.Builder().seed(Constants.NEURAL_NET_SEED) + NeuralNetConfiguration.ListBuilder confB = NeuralNetConfiguration.builder().seed(Constants.NEURAL_NET_SEED) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(conf.getUpdater() != null ? conf.getUpdater() : new Adam()) .weightInit(WeightInit.XAVIER) @@ -81,7 +81,7 @@ public class ActorCriticFactorySeparateStdDense implements ActorCriticFactorySep } confB.setInputType(conf.isUseLSTM() ? InputType.recurrent(nIn) : InputType.feedForward(nIn)); - MultiLayerConfiguration mlnconf2 = confB.build(); + NeuralNetConfiguration mlnconf2 = confB.build(); MultiLayerNetwork model = new MultiLayerNetwork(mlnconf2); model.init(); if (conf.getListeners() != null) { @@ -90,7 +90,7 @@ public class ActorCriticFactorySeparateStdDense implements ActorCriticFactorySep model.setListeners(new ScoreIterationListener(Constants.NEURAL_NET_ITERATION_LISTENER)); } - NeuralNetConfiguration.ListBuilder confB2 = new NeuralNetConfiguration.Builder().seed(Constants.NEURAL_NET_SEED) + NeuralNetConfiguration.ListBuilder confB2 = NeuralNetConfiguration.builder().seed(Constants.NEURAL_NET_SEED) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(conf.getUpdater() != null ? conf.getUpdater() : new Adam()) .weightInit(WeightInit.XAVIER) @@ -116,7 +116,7 @@ public class ActorCriticFactorySeparateStdDense implements ActorCriticFactorySep } confB2.setInputType(conf.isUseLSTM() ? InputType.recurrent(nIn) : InputType.feedForward(nIn)); - MultiLayerConfiguration mlnconf = confB2.build(); + NeuralNetConfiguration mlnconf = confB2.build(); MultiLayerNetwork model2 = new MultiLayerNetwork(mlnconf); model2.init(); if (conf.getListeners() != null) { diff --git a/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/ac/ActorCriticSeparate.java b/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/ac/ActorCriticSeparate.java index 9daeb1af8..8ae8f1944 100644 --- a/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/ac/ActorCriticSeparate.java +++ b/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/ac/ActorCriticSeparate.java @@ -23,7 +23,7 @@ package org.deeplearning4j.rl4j.network.ac; import lombok.Getter; import org.apache.commons.lang3.NotImplementedException; import org.deeplearning4j.nn.api.NeuralNetwork; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.recurrent.RnnOutputLayer; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; @@ -131,7 +131,7 @@ public class ActorCriticSeparate implements IAct @Override public void applyGradients(Gradients gradients) { int batchSize = (int)gradients.getBatchSize(); - MultiLayerConfiguration valueConf = valueNet.getLayerWiseConfigurations(); + NeuralNetConfiguration valueConf = valueNet.getConfiguration(); int valueIterationCount = valueConf.getIterationCount(); int valueEpochCount = valueConf.getEpochCount(); Gradient valueGradient = gradients.getGradient(CommonGradientNames.ActorCritic.Value); @@ -145,7 +145,7 @@ public class ActorCriticSeparate implements IAct } valueConf.setIterationCount(valueIterationCount + 1); - MultiLayerConfiguration policyConf = policyNet.getLayerWiseConfigurations(); + NeuralNetConfiguration policyConf = policyNet.getConfiguration(); int policyIterationCount = policyConf.getIterationCount(); int policyEpochCount = policyConf.getEpochCount(); Gradient policyGradient = gradients.getGradient(CommonGradientNames.ActorCritic.Policy); @@ -191,7 +191,7 @@ public class ActorCriticSeparate implements IAct @Deprecated public void applyGradient(Gradient[] gradient, int batchSize) { - MultiLayerConfiguration valueConf = valueNet.getLayerWiseConfigurations(); + NeuralNetConfiguration valueConf = valueNet.getConfiguration(); int valueIterationCount = valueConf.getIterationCount(); int valueEpochCount = valueConf.getEpochCount(); valueNet.getUpdater().update(valueNet, gradient[0], valueIterationCount, valueEpochCount, batchSize, LayerWorkspaceMgr.noWorkspaces()); @@ -204,7 +204,7 @@ public class ActorCriticSeparate implements IAct } valueConf.setIterationCount(valueIterationCount + 1); - MultiLayerConfiguration policyConf = policyNet.getLayerWiseConfigurations(); + NeuralNetConfiguration policyConf = policyNet.getConfiguration(); int policyIterationCount = policyConf.getIterationCount(); int policyEpochCount = policyConf.getEpochCount(); policyNet.getUpdater().update(policyNet, gradient[1], policyIterationCount, policyEpochCount, batchSize, LayerWorkspaceMgr.noWorkspaces()); diff --git a/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/dqn/DQN.java b/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/dqn/DQN.java index 8338884a2..c292432b2 100644 --- a/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/dqn/DQN.java +++ b/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/dqn/DQN.java @@ -22,7 +22,7 @@ package org.deeplearning4j.rl4j.network.dqn; import org.apache.commons.lang3.NotImplementedException; import org.deeplearning4j.nn.api.NeuralNetwork; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; @@ -157,7 +157,7 @@ public class DQN implements IDQN { public void applyGradients(Gradients gradients) { Gradient qValues = gradients.getGradient(CommonGradientNames.QValues); - MultiLayerConfiguration mlnConf = mln.getLayerWiseConfigurations(); + NeuralNetConfiguration mlnConf = mln.getConfiguration(); int iterationCount = mlnConf.getIterationCount(); int epochCount = mlnConf.getEpochCount(); mln.getUpdater().update(mln, qValues, iterationCount, epochCount, (int)gradients.getBatchSize(), LayerWorkspaceMgr.noWorkspaces()); @@ -172,7 +172,7 @@ public class DQN implements IDQN { } public void applyGradient(Gradient[] gradient, int batchSize) { - MultiLayerConfiguration mlnConf = mln.getLayerWiseConfigurations(); + NeuralNetConfiguration mlnConf = mln.getConfiguration(); int iterationCount = mlnConf.getIterationCount(); int epochCount = mlnConf.getEpochCount(); mln.getUpdater().update(mln, gradient[0], iterationCount, epochCount, batchSize, LayerWorkspaceMgr.noWorkspaces()); diff --git a/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/dqn/DQNFactoryStdConv.java b/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/dqn/DQNFactoryStdConv.java index cf683aa35..bb64200bd 100644 --- a/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/dqn/DQNFactoryStdConv.java +++ b/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/dqn/DQNFactoryStdConv.java @@ -24,7 +24,7 @@ import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Value; import org.deeplearning4j.nn.api.OptimizationAlgorithm; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; @@ -55,7 +55,7 @@ public class DQNFactoryStdConv implements DQNFactory { throw new AssertionError("Impossible to apply convolutional layer on a shape == 1"); - NeuralNetConfiguration.ListBuilder confB = new NeuralNetConfiguration.Builder().seed(Constants.NEURAL_NET_SEED) + NeuralNetConfiguration.ListBuilder confB = NeuralNetConfiguration.builder().seed(Constants.NEURAL_NET_SEED) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .l2(conf.getL2()) .updater(conf.getUpdater() != null ? conf.getUpdater() : new Adam()) @@ -71,8 +71,8 @@ public class DQNFactoryStdConv implements DQNFactory { confB.layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MSE).activation(Activation.IDENTITY).nOut(numOutputs) .build()); - confB.setInputType(InputType.convolutional(shapeInputs[1], shapeInputs[2], shapeInputs[0])); - MultiLayerConfiguration mlnconf = confB.build(); + confB.inputType(InputType.convolutional(shapeInputs[1], shapeInputs[2], shapeInputs[0])); + NeuralNetConfiguration mlnconf = confB.build(); MultiLayerNetwork model = new MultiLayerNetwork(mlnconf); model.init(); if (conf.getListeners() != null) { diff --git a/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/dqn/DQNFactoryStdDense.java b/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/dqn/DQNFactoryStdDense.java index d35a5f064..15b33170a 100644 --- a/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/dqn/DQNFactoryStdDense.java +++ b/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/dqn/DQNFactoryStdDense.java @@ -24,7 +24,7 @@ import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Value; import org.deeplearning4j.nn.api.OptimizationAlgorithm; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; @@ -54,7 +54,7 @@ public class DQNFactoryStdDense implements DQNFactory { nIn *= i; } - NeuralNetConfiguration.ListBuilder confB = new NeuralNetConfiguration.Builder().seed(Constants.NEURAL_NET_SEED) + NeuralNetConfiguration.ListBuilder confB = NeuralNetConfiguration.builder().seed(Constants.NEURAL_NET_SEED) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(conf.getUpdater() != null ? conf.getUpdater() : new Adam()) .weightInit(WeightInit.XAVIER) @@ -82,7 +82,7 @@ public class DQNFactoryStdDense implements DQNFactory { ); - MultiLayerConfiguration mlnconf = confB.build(); + NeuralNetConfiguration mlnconf = confB.build(); MultiLayerNetwork model = new MultiLayerNetwork(mlnconf); model.init(); if (conf.getListeners() != null) { diff --git a/.old/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/NStepRnn.java b/.old/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/NStepRnn.java index 5cd403cee..dc23edd6e 100644 --- a/.old/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/NStepRnn.java +++ b/.old/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/NStepRnn.java @@ -134,7 +134,7 @@ public class NStepRnn { } private static ComputationGraphConfiguration.GraphBuilder buildBaseNetworkConfiguration(int lstmLayerSize, int dl1Size, int dl2Size) { - return new NeuralNetConfiguration.Builder().seed(Constants.NEURAL_NET_SEED) + return NeuralNetConfiguration.builder().seed(Constants.NEURAL_NET_SEED) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Adam()) .weightInit(WeightInit.XAVIER) diff --git a/.old/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/RobotLakeExample.java b/.old/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/RobotLakeExample.java index 4f95632a0..adbd6a3c5 100644 --- a/.old/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/RobotLakeExample.java +++ b/.old/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/RobotLakeExample.java @@ -188,7 +188,7 @@ public class RobotLakeExample { } private static ComputationGraphConfiguration.GraphBuilder buildBaseNetworkConfiguration() { - return new NeuralNetConfiguration.Builder().seed(Constants.NEURAL_NET_SEED) + return NeuralNetConfiguration.builder().seed(Constants.NEURAL_NET_SEED) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Adam()) .weightInit(WeightInit.XAVIER) diff --git a/.old/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/TMazeExample.java b/.old/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/TMazeExample.java index 617e436df..64c971e00 100644 --- a/.old/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/TMazeExample.java +++ b/.old/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/TMazeExample.java @@ -174,7 +174,7 @@ public class TMazeExample { } private static ComputationGraphConfiguration.GraphBuilder buildBaseNetworkConfiguration() { - return new NeuralNetConfiguration.Builder().seed(Constants.NEURAL_NET_SEED) + return NeuralNetConfiguration.builder().seed(Constants.NEURAL_NET_SEED) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Adam()) .weightInit(WeightInit.XAVIER) diff --git a/.old/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/network/MultiLayerNetworkHandlerTest.java b/.old/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/network/MultiLayerNetworkHandlerTest.java index 0f5b51407..69d305b31 100644 --- a/.old/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/network/MultiLayerNetworkHandlerTest.java +++ b/.old/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/network/MultiLayerNetworkHandlerTest.java @@ -21,7 +21,7 @@ package org.deeplearning4j.rl4j.network; import org.deeplearning4j.nn.api.Updater; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.recurrent.RnnOutputLayer; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; @@ -52,7 +52,7 @@ public class MultiLayerNetworkHandlerTest { private MultiLayerNetwork modelMock; private TrainingListener trainingListenerMock; - private MultiLayerConfiguration configurationMock; + private NeuralNetConfiguration configurationMock; private MultiLayerNetworkHandler sut; @@ -60,10 +60,10 @@ public class MultiLayerNetworkHandlerTest { modelMock = mock(MultiLayerNetwork.class); trainingListenerMock = mock(TrainingListener.class); - configurationMock = mock(MultiLayerConfiguration.class); + configurationMock = mock(NeuralNetConfiguration.class); when(configurationMock.getIterationCount()).thenReturn(123); when(configurationMock.getEpochCount()).thenReturn(234); - when(modelMock.getLayerWiseConfigurations()).thenReturn(configurationMock); + when(modelMock.getConfiguration()).thenReturn(configurationMock); if(setupRecurrent) { when(modelMock.getOutputLayer()).thenReturn(new RnnOutputLayer(null, null)); diff --git a/.old/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/policy/PolicyTest.java b/.old/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/policy/PolicyTest.java index f74713466..f0ff3f641 100644 --- a/.old/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/policy/PolicyTest.java +++ b/.old/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/policy/PolicyTest.java @@ -166,9 +166,9 @@ public class PolicyTest { @Test public void testACPolicy() throws Exception { - ComputationGraph cg = new ComputationGraph(new NeuralNetConfiguration.Builder().seed(444).graphBuilder().addInputs("input") + ComputationGraph cg = new ComputationGraph(NeuralNetConfiguration.builder().seed(444).graphBuilder().addInputs("input") .addLayer("output", new OutputLayer.Builder().nOut(1).lossFunction(LossFunctions.LossFunction.XENT).activation(Activation.SIGMOID).build(), "input").setOutputs("output").build()); - MultiLayerNetwork mln = new MultiLayerNetwork(new NeuralNetConfiguration.Builder().seed(555).list() + MultiLayerNetwork mln = new MultiLayerNetwork(NeuralNetConfiguration.builder().seed(555).list() .layer(0, new OutputLayer.Builder().nOut(1).lossFunction(LossFunctions.LossFunction.XENT).activation(Activation.SIGMOID).build()).build()); ACPolicy policy = new ACPolicy(new DummyAC(mln), true, Nd4j.getRandom()); diff --git a/README.md b/README.md index e3eb6ba84..d1e64a639 100644 --- a/README.md +++ b/README.md @@ -48,12 +48,12 @@ Deeplearning4J offers a very high level API for defining even complex neural net you how LeNet, a convolutional neural network, is defined in DL4J. ```java -MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() +NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .seed(seed) .l2(0.0005) .weightInit(WeightInit.XAVIER) .updater(new Adam(1e-3)) - .list() + .layer(new ConvolutionLayer.Builder(5, 5) .stride(1,1) .nOut(20) @@ -78,7 +78,7 @@ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .nOut(outputNum) .activation(Activation.SOFTMAX) .build()) - .setInputType(InputType.convolutionalFlat(28,28,1)) + .inputType(InputType.convolutionalFlat(28,28,1)) .build(); ``` diff --git a/brutex-extended-tests/src/test/java/net/brutex/gan/App.java b/brutex-extended-tests/src/test/java/net/brutex/gan/App.java index fca68610a..c03d9f5c2 100644 --- a/brutex-extended-tests/src/test/java/net/brutex/gan/App.java +++ b/brutex-extended-tests/src/test/java/net/brutex/gan/App.java @@ -46,13 +46,12 @@ import org.datavec.image.transform.ResizeImageTransform; import org.datavec.image.transform.ShowImageTransform; import org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator; import org.deeplearning4j.nn.conf.GradientNormalization; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.ActivationLayer; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.DropoutLayer; -import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.conf.layers.misc.FrozenLayerWithBackprop; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; @@ -61,7 +60,6 @@ import org.deeplearning4j.nn.weights.WeightInitXavier; import org.deeplearning4j.optimize.listeners.ScoreToChartListener; import org.junit.jupiter.api.Test; import org.nd4j.linalg.activations.Activation; -import org.nd4j.linalg.activations.impl.ActivationIdentity; import org.nd4j.linalg.activations.impl.ActivationLReLU; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.DataSet; @@ -92,8 +90,8 @@ public class App { private static JPanel panel; private static JPanel panel2; - private static Layer[] genLayers() { - return new Layer[] { + private static LayerConfiguration[] genLayers() { + return new LayerConfiguration[] { new DenseLayer.Builder().nIn(INPUT).nOut(X_DIM*Y_DIM*CHANNELS).weightInit(WeightInit.NORMAL).build(), new ActivationLayer.Builder(new ActivationLReLU(0.2)).build(), new DenseLayer.Builder().nIn(X_DIM*Y_DIM*CHANNELS).nOut(X_DIM*Y_DIM).build(), @@ -103,33 +101,33 @@ public class App { new DenseLayer.Builder().nIn(X_DIM*Y_DIM).nOut(X_DIM*Y_DIM*CHANNELS).activation(Activation.TANH) .build() }; - - - } + } /** * Returns a network config that takes in a 10x10 random number and produces a 28x28 grayscale image. * * @return config */ - private static MultiLayerConfiguration generator() { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + private static NeuralNetConfiguration generator() { + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .seed(42) .updater(UPDATER) .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer) .gradientNormalizationThreshold(GRADIENT_THRESHOLD) - .weightInit(WeightInit.XAVIER) + //.weightInit(WeightInit.XAVIER) + .weightInitFn(new WeightInitXavier()) .activation(Activation.IDENTITY) - .list(genLayers()) - .setInputType(InputType.convolutional(X_DIM, Y_DIM, CHANNELS)) + .layersFromArray(genLayers()) + .inputType(InputType.convolutional(X_DIM, Y_DIM, CHANNELS)) // .inputPreProcessor("CNN1", new FeedForwardToCnnPreProcessor(Y_DIM, X_DIM, CHANNELS)) .build(); + ((NeuralNetConfiguration) conf).init(); return conf; } - private static Layer[] disLayers() { - return new Layer[]{ + private static LayerConfiguration[] disLayers() { + return new LayerConfiguration[]{ new DenseLayer.Builder().nOut(X_DIM*Y_DIM*CHANNELS*2).build(), //input is set by setInputType on the network new ActivationLayer.Builder(new ActivationLReLU(0.2)).build(), new DropoutLayer.Builder(1 - 0.5).build(), @@ -146,45 +144,50 @@ public class App { }; } - private static MultiLayerConfiguration discriminator() { + private static NeuralNetConfiguration discriminator() { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .seed(42) .updater(UPDATER) .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer) .gradientNormalizationThreshold(GRADIENT_THRESHOLD) .weightInit(WeightInit.XAVIER) + //.weightInitFn(new WeightInitXavier()) + //.activationFn(new ActivationIdentity()) .activation(Activation.IDENTITY) - .list(disLayers()) - .setInputType(InputType.convolutional(X_DIM, Y_DIM, CHANNELS)) + .layersFromArray(disLayers()) + .inputType(InputType.convolutional(X_DIM, Y_DIM, CHANNELS)) .build(); + ((NeuralNetConfiguration) conf).init(); return conf; } - private static MultiLayerConfiguration gan() { - Layer[] genLayers = genLayers(); - Layer[] disLayers = Arrays.stream(disLayers()) + private static NeuralNetConfiguration gan() { + LayerConfiguration[] genLayers = genLayers(); + LayerConfiguration[] disLayers = Arrays.stream(disLayers()) .map((layer) -> { - if (layer instanceof DenseLayer || layer instanceof OutputLayer) { - return new FrozenLayerWithBackprop(layer); + if (layer instanceof DenseLayer || layer instanceof OutputLayer) { + return new FrozenLayerWithBackprop(layer); } else { return layer; } - }).toArray(Layer[]::new); - Layer[] layers = ArrayUtils.addAll(genLayers, disLayers); + }).toArray(LayerConfiguration[]::new); + LayerConfiguration[] layers = ArrayUtils.addAll(genLayers, disLayers); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .seed(42) .updater( Adam.builder().learningRate(0.0002).beta1(0.5).build() ) .gradientNormalization( GradientNormalization.RenormalizeL2PerLayer) .gradientNormalizationThreshold( 100 ) - .weightInit( new WeightInitXavier() ) - .activation( new ActivationIdentity()) - .list( layers ) - .setInputType( InputType.convolutional(X_DIM, Y_DIM, CHANNELS)) + //.weightInitFn( new WeightInitXavier() ) //this is internal + .weightInit( WeightInit.XAVIER) + //.activationFn( new ActivationIdentity()) //this is internal + .activation( Activation.IDENTITY ) + .layersFromArray( layers ) + .inputType( InputType.convolutional(X_DIM, Y_DIM, CHANNELS)) .build(); - +((NeuralNetConfiguration) conf).init(); return conf; } @@ -195,6 +198,8 @@ public class App { } public static void main(String... args) throws Exception { + + log.info("\u001B[32m Some \u001B[1m green \u001B[22m text \u001B[0m \u001B[7m Inverted\u001B[0m "); Nd4j.getMemoryManager().setAutoGcWindow(15 * 1000); // MnistDataSetIterator trainData = new MnistDataSetIterator(128, true, 45); @@ -220,9 +225,10 @@ public class App { MultiLayerNetwork gen = new MultiLayerNetwork(generator()); MultiLayerNetwork dis = new MultiLayerNetwork(discriminator()); MultiLayerNetwork gan = new MultiLayerNetwork(gan()); - gen.init(); - dis.init(); - gan.init(); + gen.init(); log.debug("Generator network: {}", gen); + dis.init(); log.debug("Discriminator network: {}", dis); + gan.init(); log.debug("Complete GAN network: {}", gan); + copyParams(gen, dis, gan); diff --git a/brutex-extended-tests/src/test/java/net/brutex/gan/GAN.java b/brutex-extended-tests/src/test/java/net/brutex/gan/GAN.java index 25473fc9e..659c6ab32 100644 --- a/brutex-extended-tests/src/test/java/net/brutex/gan/GAN.java +++ b/brutex-extended-tests/src/test/java/net/brutex/gan/GAN.java @@ -25,6 +25,7 @@ import org.apache.commons.lang3.ArrayUtils; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.*; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.optimize.api.BaseTrainingListener; import org.nd4j.evaluation.classification.Evaluation; @@ -199,13 +200,13 @@ public class GAN { Layer[] disLayers = ganDiscriminator.getLayers(); Layer[] layers = ArrayUtils.addAll(genLayers, disLayers); - MultiLayerConfiguration genConf = generator.getLayerWiseConfigurations(); - MultiLayerConfiguration disConf = ganDiscriminator.getLayerWiseConfigurations(); - org.deeplearning4j.nn.conf.layers.Layer[] confLayers = new org.deeplearning4j.nn.conf.layers.Layer[layers.length]; + NeuralNetConfiguration genConf = generator.getConfiguration(); + NeuralNetConfiguration disConf = ganDiscriminator.getConfiguration(); + LayerConfiguration[] confLayers = new LayerConfiguration[layers.length]; Map preProcessors = new HashMap<>(); for (int i = 0; i < layers.length; i++) { - confLayers[i] = layers[i].conf().getLayer(); + confLayers[i] = layers[i].getLayerConfiguration(); if (i < numGenLayers) { preProcessors.put(i, genConf.getInputPreProcess(i)); } else { @@ -213,7 +214,7 @@ public class GAN { } } - MultiLayerConfiguration ganConf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration ganConf = NeuralNetConfiguration.builder() .seed(seed) .updater(updater) .biasUpdater(biasUpdater) @@ -224,7 +225,7 @@ public class GAN { .trainingWorkspaceMode(trainingWorkSpaceMode) .inferenceWorkspaceMode(inferenceWorkspaceMode) .cacheMode(cacheMode) - .list(confLayers) + .layersFromArray(confLayers) .inputPreProcessors(preProcessors) .build(); gan = new MultiLayerNetwork(ganConf); @@ -267,7 +268,7 @@ public class GAN { } /** - * GAN builder, used as a starting point for creating a MultiLayerConfiguration or + * GAN builder, used as a starting point for creating a NeuralNetConfiguration or * ComputationGraphConfiguration.
*/ public static class Builder implements Cloneable { diff --git a/brutex-extended-tests/src/test/java/net/brutex/gan/MnistDCGANExample.java b/brutex-extended-tests/src/test/java/net/brutex/gan/MnistDCGANExample.java index d0e5bb73d..07e6a148a 100644 --- a/brutex-extended-tests/src/test/java/net/brutex/gan/MnistDCGANExample.java +++ b/brutex-extended-tests/src/test/java/net/brutex/gan/MnistDCGANExample.java @@ -100,7 +100,7 @@ public class MnistDCGANExample { public static void main(String[] args) throws Exception { Supplier genSupplier = () -> { - return new MultiLayerNetwork(new NeuralNetConfiguration.Builder().list() + return new MultiLayerNetwork(NeuralNetConfiguration.builder() .layer(0, new DenseLayer.Builder().nIn(latentDim).nOut(width / 2 * height / 2 * 128) .activation(Activation.LEAKYRELU).weightInit(WeightInit.NORMAL).build()) .layer(1, new Convolution2D.Builder().nIn(128).nOut(128).kernelSize(5, 5) @@ -119,16 +119,16 @@ public class MnistDCGANExample { .inputPreProcessor(1, new FeedForwardToCnnPreProcessor(height / 2, width / 2, 128)) .inputPreProcessor(6, new CnnToFeedForwardPreProcessor(height, width, channels)) - .setInputType(InputType.feedForward(latentDim)) + .inputType(InputType.feedForward(latentDim)) .build()); }; GAN.DiscriminatorProvider discriminatorProvider = (updater) -> { - return new MultiLayerNetwork(new NeuralNetConfiguration.Builder() + return new MultiLayerNetwork(NeuralNetConfiguration.builder() .updater(new RmsProp.Builder().learningRate(0.0008).rmsDecay(1e-8).build()) //.gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue) //.gradientNormalizationThreshold(100.0) - .list() + .layer(0, new Convolution2D.Builder().nIn(channels).nOut(64).kernelSize(3, 3) .activation(Activation.LEAKYRELU).build()) .layer(1, new Convolution2D.Builder().nIn(64).nOut(64).kernelSize(3, 3).stride(2, 2) @@ -142,7 +142,7 @@ public class MnistDCGANExample { .layer(6, new LossLayer.Builder().lossFunction(LossFunctions.LossFunction.XENT).build()) .inputPreProcessor(0, new FeedForwardToCnnPreProcessor(height, width, channels)) .inputPreProcessor(4, new CnnToFeedForwardPreProcessor(2, 2, 64)) - .setInputType(InputType.convolutionalFlat(height, width, channels)) + .inputType(InputType.convolutionalFlat(height, width, channels)) .build()); }; diff --git a/brutex-extended-tests/src/test/java/net/brutex/gan/MnistSimpleGAN.java b/brutex-extended-tests/src/test/java/net/brutex/gan/MnistSimpleGAN.java index 037a0be9d..be3014f3c 100644 --- a/brutex-extended-tests/src/test/java/net/brutex/gan/MnistSimpleGAN.java +++ b/brutex-extended-tests/src/test/java/net/brutex/gan/MnistSimpleGAN.java @@ -23,7 +23,6 @@ package net.brutex.gan; import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; import org.deeplearning4j.nn.conf.GradientNormalization; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.ActivationLayer; import org.deeplearning4j.nn.conf.layers.DenseLayer; @@ -57,12 +56,12 @@ public class MnistSimpleGAN { public static MultiLayerNetwork getGenerator() { - MultiLayerConfiguration genConf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration genConf = NeuralNetConfiguration.builder() .weightInit(WeightInit.XAVIER) .activation(Activation.IDENTITY) .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer) .gradientNormalizationThreshold(100) - .list() + .layer(new DenseLayer.Builder().nIn(100).nOut(256).weightInit(WeightInit.NORMAL).build()) .layer(new ActivationLayer.Builder(new ActivationLReLU(0.2)).build()) .layer(new DenseLayer.Builder().nIn(256).nOut(512).build()) @@ -76,14 +75,14 @@ public class MnistSimpleGAN { public static MultiLayerNetwork getDiscriminator(IUpdater updater) { - MultiLayerConfiguration discConf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration discConf = NeuralNetConfiguration.builder() .seed(42) .updater(updater) .weightInit(WeightInit.XAVIER) .activation(Activation.IDENTITY) .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer) .gradientNormalizationThreshold(100) - .list() + .layer(new DenseLayer.Builder().nIn(784).nOut(1024).updater(updater).build()) .layer(new ActivationLayer.Builder(new ActivationLReLU(0.2)).build()) .layer(new DropoutLayer.Builder(1 - 0.5).build()) diff --git a/brutex-extended-tests/src/test/java/net/brutex/spark/BrianTest.java b/brutex-extended-tests/src/test/java/net/brutex/spark/BrianTest.java index bc0aafa13..75965d7b5 100644 --- a/brutex-extended-tests/src/test/java/net/brutex/spark/BrianTest.java +++ b/brutex-extended-tests/src/test/java/net/brutex/spark/BrianTest.java @@ -35,7 +35,6 @@ import org.apache.spark.sql.RowFactory; import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.types.DataTypes; import org.apache.spark.sql.types.Metadata; -import org.apache.spark.sql.types.StringType; import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.StructType; import org.datavec.api.records.reader.impl.csv.CSVRecordReader; @@ -43,12 +42,10 @@ import org.datavec.api.transform.TransformProcess; import org.datavec.api.transform.filter.FilterInvalidValues; import org.datavec.api.transform.schema.Schema; import org.datavec.api.Writable; -import org.datavec.spark.transform.Normalization; import org.datavec.spark.transform.SparkTransformExecutor; import org.datavec.spark.transform.misc.StringToWritablesFunction; import org.deeplearning4j.datasets.iterator.impl.EmnistDataSetIterator; import org.deeplearning4j.datasets.iterator.impl.EmnistDataSetIterator.Set; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; @@ -287,10 +284,10 @@ public class BrianTest extends BaseSparkSessionTest { //Define Network - MultiLayerConfiguration multiLayerConfiguration = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration multiLayerConfiguration = NeuralNetConfiguration.builder() .seed(123) .updater(new Nesterovs(0.1, 0.9)) - .list() + .layer(0, new DenseLayer.Builder().nIn(5).nOut(20).weightInit(WeightInit.XAVIER) .activation(Activation.RELU).l2(0.001).build()) .layer(1, new DenseLayer.Builder().nIn(20).nOut(20).weightInit(WeightInit.XAVIER) diff --git a/brutex-extended-tests/src/test/java/net/brutex/spark/BrianTest2.java b/brutex-extended-tests/src/test/java/net/brutex/spark/BrianTest2.java index f32c3c4de..9195933ff 100644 --- a/brutex-extended-tests/src/test/java/net/brutex/spark/BrianTest2.java +++ b/brutex-extended-tests/src/test/java/net/brutex/spark/BrianTest2.java @@ -37,7 +37,6 @@ import org.datavec.api.transform.schema.Schema; import org.datavec.api.Writable; import org.datavec.spark.transform.SparkTransformExecutor; import org.datavec.spark.transform.misc.StringToWritablesFunction; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; @@ -295,10 +294,10 @@ public class BrianTest2 /*extends BaseDL4JTest*/ { */ //Define Network - MultiLayerConfiguration multiLayerConfiguration = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration multiLayerConfiguration = NeuralNetConfiguration.builder() .seed(123) .updater(new Nesterovs(0.1, 0.9)) - .list() + .layer(0, new DenseLayer.Builder().nIn(5).nOut(20).weightInit(WeightInit.XAVIER).activation(Activation.RELU).l2(0.001).build()) .layer(1, new DenseLayer.Builder().nIn(20).nOut(20).weightInit(WeightInit.XAVIER).activation(Activation.RELU).build()) //.layer(2, new DenseLayerConfiguration.Builder().nIn(9).nOut(9).weightInit(WeightInit.XAVIER).activation(Activation.RELU).build()) diff --git a/brutex-extended-tests/src/test/java/net/brutex/spark/TestServer.java b/brutex-extended-tests/src/test/java/net/brutex/spark/TestServer.java index b81f70fc8..0cf2e5676 100644 --- a/brutex-extended-tests/src/test/java/net/brutex/spark/TestServer.java +++ b/brutex-extended-tests/src/test/java/net/brutex/spark/TestServer.java @@ -21,7 +21,6 @@ package net.brutex.spark; -import lombok.extern.log4j.Log4j2; //import net.brutex.ai.performance.storage.PostgresStatsStorage; import lombok.extern.slf4j.Slf4j; import org.datavec.api.records.reader.RecordReader; @@ -29,22 +28,17 @@ import org.datavec.api.records.reader.impl.collection.ListStringRecordReader; import org.datavec.api.records.reader.impl.csv.CSVRecordReader; import org.datavec.api.split.FileSplit; import org.datavec.api.split.ListStringSplit; -import org.deeplearning4j.core.storage.StatsStorage; -import org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; + import org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.layers.DenseLayer; + import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.LSTM; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor; import org.deeplearning4j.nn.conf.preprocessor.RnnToFeedForwardPreProcessor; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.weights.WeightInit; -import org.deeplearning4j.optimize.listeners.ScoreIterationListener; -import org.deeplearning4j.ui.api.UIServer; -import org.deeplearning4j.ui.model.stats.StatsListener; -import org.deeplearning4j.ui.model.storage.FileStatsStorage; -import org.junit.jupiter.api.AfterAll; + import org.deeplearning4j.ui.api.UIServer; + import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Tag; import org.junit.jupiter.api.Test; import org.nd4j.evaluation.classification.Evaluation; @@ -86,13 +80,13 @@ public class TestServer { int i = 2000; int numClasses = 10; int numBatchSize = 100; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .seed(1234) .weightInit(WeightInit.XAVIER) .updater(new Nesterovs.Builder().learningRate(0.15).build()) .activation(Activation.RELU) .l2(0) - .list() + //.layer(0, new ConvolutionLayer.Builder().nIn(1).kernelSize(1, 5).stride(1,1).padding(0,2).nOut(1).name("1st Filter").updater(new Adam.Builder().learningRate(0.2).build()).build()) //.layer(1, new ConvolutionLayer.Builder().nIn(1).kernelSize(1, 2).stride(1,2).padding(0,0).nOut(1).name("2nd Filter").updater(new Adam.Builder().learningRate(0.1).build()).build()) // .layer(1, new DenseLayerConfiguration.Builder().nIn(10).nOut(64).activation(Activation.RELU).build()) diff --git a/brutex-extended-tests/src/test/java/net/brutex/spark/TestServer2.java b/brutex-extended-tests/src/test/java/net/brutex/spark/TestServer2.java index ac625f2b6..c2d6f739c 100644 --- a/brutex-extended-tests/src/test/java/net/brutex/spark/TestServer2.java +++ b/brutex-extended-tests/src/test/java/net/brutex/spark/TestServer2.java @@ -21,7 +21,6 @@ package net.brutex.spark; -import lombok.extern.log4j.Log4j2; //import net.brutex.ai.performance.storage.PostgresStatsStorage; import lombok.extern.slf4j.Slf4j; import org.datavec.api.records.reader.RecordReader; @@ -32,9 +31,8 @@ import org.datavec.api.split.ListStringSplit; import org.datavec.image.recordreader.ImageRecordReader; import org.deeplearning4j.core.storage.StatsStorage; import org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.layers.DenseLayer; + import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.LSTM; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor; @@ -122,13 +120,13 @@ public class TestServer2 { int i = 2000; int numClasses = 10; int numBatchSize = 100; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .seed(1234) .weightInit(WeightInit.XAVIER) .updater(new Nesterovs.Builder().learningRate(0.15).build()) .activation(Activation.RELU) .l2(0) - .list() + //.layer(0, new ConvolutionLayer.Builder().nIn(1).kernelSize(1, 5).stride(1,1).padding(0,2).nOut(1).name("1st Filter").updater(new Adam.Builder().learningRate(0.2).build()).build()) //.layer(1, new ConvolutionLayer.Builder().nIn(1).kernelSize(1, 2).stride(1,2).padding(0,0).nOut(1).name("2nd Filter").updater(new Adam.Builder().learningRate(0.1).build()).build()) // .layer(1, new DenseLayerConfiguration.Builder().nIn(10).nOut(64).activation(Activation.RELU).build()) diff --git a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/IntegrationTestBaselineGenerator.java b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/IntegrationTestBaselineGenerator.java index 8111d2b7d..0842ebfd4 100644 --- a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/IntegrationTestBaselineGenerator.java +++ b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/IntegrationTestBaselineGenerator.java @@ -21,14 +21,14 @@ package org.deeplearning4j.integration; import lombok.extern.slf4j.Slf4j; +import net.brutex.ai.dnn.api.IModel; import org.apache.commons.io.FileUtils; import org.deeplearning4j.datasets.iterator.MultiDataSetWrapperIterator; import org.deeplearning4j.integration.testcases.samediff.SameDiffCNNCases; import org.deeplearning4j.integration.testcases.samediff.SameDiffMLPTestCases; import org.deeplearning4j.integration.testcases.samediff.SameDiffRNNTestCases; -import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.optimize.listeners.CollectScoresListener; @@ -135,12 +135,12 @@ public class IntegrationTestBaselineGenerator { MultiLayerNetwork mln = null; ComputationGraph cg = null; SameDiff sd = null; - Model m = null; + IModel m = null; if (tc.getTestType() == TestCase.TestType.RANDOM_INIT) { Object config = tc.getConfiguration(); String json = null; - if (config instanceof MultiLayerConfiguration) { - MultiLayerConfiguration mlc = (MultiLayerConfiguration) config; + if (config instanceof NeuralNetConfiguration) { + NeuralNetConfiguration mlc = (NeuralNetConfiguration) config; json = mlc.toJson(); mln = new MultiLayerNetwork(mlc); mln.init(); diff --git a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/IntegrationTestRunner.java b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/IntegrationTestRunner.java index 489c8021d..870f4022a 100644 --- a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/IntegrationTestRunner.java +++ b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/IntegrationTestRunner.java @@ -25,18 +25,18 @@ import com.google.common.collect.ImmutableSet; import com.google.common.reflect.ClassPath; import lombok.NonNull; import lombok.extern.slf4j.Slf4j; +import net.brutex.ai.dnn.api.IModel; import org.apache.commons.io.FileUtils; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.common.config.DL4JClassLoading; import org.deeplearning4j.datasets.iterator.MultiDataSetWrapperIterator; import org.deeplearning4j.integration.util.CountingMultiDataSetIterator; -import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.nn.conf.BackpropType; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.InputPreProcessor; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.graph.LayerVertex; -import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.graph.vertex.GraphVertex; import org.deeplearning4j.nn.layers.BaseOutputLayer; @@ -177,22 +177,22 @@ public class IntegrationTestRunner { MultiLayerNetwork mln = null; ComputationGraph cg = null; SameDiff sd = null; - Model m = null; + IModel m = null; if (tc.getTestType() == TestCase.TestType.RANDOM_INIT) { log.info("Checking RANDOM_INIT test case: saved model vs. initialized model"); //Checking randomly initialized model: File savedModel = new File(testBaseDir, IntegrationTestRunner.RANDOM_INIT_UNTRAINED_MODEL_FILENAME); Object config = tc.getConfiguration(); - if (config instanceof MultiLayerConfiguration) { - MultiLayerConfiguration mlc = (MultiLayerConfiguration) config; + if (config instanceof NeuralNetConfiguration) { + NeuralNetConfiguration mlc = (NeuralNetConfiguration) config; mln = new MultiLayerNetwork(mlc); mln.init(); m = mln; MultiLayerNetwork loaded = MultiLayerNetwork.load(savedModel, true); - assertEquals(loaded.getLayerWiseConfigurations(), mln.getLayerWiseConfigurations(), "Configs not equal"); + assertEquals(loaded.getConfiguration(), mln.getConfiguration(), "Configs not equal"); assertEquals( loaded.params(), mln.params(), "Params not equal"); - assertEquals( loaded.paramTable(), mln.paramTable(), "Param table not equal"); + assertEquals( loaded.getParamTable(), mln.getParamTable(), "Param table not equal"); } else if(config instanceof ComputationGraphConfiguration ){ ComputationGraphConfiguration cgc = (ComputationGraphConfiguration) config; cg = new ComputationGraph(cgc); @@ -426,8 +426,8 @@ public class IntegrationTestRunner { boolean isTbptt; int tbpttLength; if(modelType == ModelType.MLN){ - isTbptt = mln.getLayerWiseConfigurations().getBackpropType() == BackpropType.TruncatedBPTT; - tbpttLength = mln.getLayerWiseConfigurations().getTbpttFwdLength(); + isTbptt = mln.getConfiguration().getBackpropType() == BackpropType.TruncatedBPTT; + tbpttLength = mln.getConfiguration().getTbpttFwdLength(); } else if(modelType == ModelType.CG) { isTbptt = cg.getComputationGraphConfiguration().getBackpropType() == BackpropType.TruncatedBPTT; tbpttLength = cg.getComputationGraphConfiguration().getTbpttFwdLength(); @@ -606,7 +606,7 @@ public class IntegrationTestRunner { if (modelType == ModelType.MLN) { ModelSerializer.writeModel(m, f, true); MultiLayerNetwork restored = MultiLayerNetwork.load(f, true); - assertEquals(mln.getLayerWiseConfigurations(), restored.getLayerWiseConfigurations()); + assertEquals(mln.getConfiguration(), restored.getConfiguration()); assertEquals(mln.params(), restored.params()); } else if(modelType == ModelType.CG){ ModelSerializer.writeModel(m, f, true); @@ -722,7 +722,7 @@ public class IntegrationTestRunner { } //Work out which layers, vertices etc we have seen - so we can (at the end of all tests) log our integration test coverage - private static void collectCoverageInformation(Model m){ + private static void collectCoverageInformation(IModel m){ boolean isMLN = (m instanceof MultiLayerNetwork); MultiLayerNetwork mln = (isMLN ? (MultiLayerNetwork)m : null); ComputationGraph cg = (!isMLN ? (ComputationGraph)m : null); @@ -735,14 +735,14 @@ public class IntegrationTestRunner { layers = cg.getLayers(); } for (org.deeplearning4j.nn.api.Layer l : layers) { - Layer lConf = l.conf().getLayer(); + LayerConfiguration lConf = l.getLayerConfiguration(); layerConfClassesSeen.put(lConf.getClass(), layerConfClassesSeen.getOrDefault(lConf.getClass(), 0) + 1); } //Collect preprocessor coverage information: Collection preProcessors; if (isMLN) { - preProcessors = mln.getLayerWiseConfigurations().getInputPreProcessors().values(); + preProcessors = mln.getConfiguration().getInputPreProcessors().values(); } else { preProcessors = new ArrayList<>(); for (org.deeplearning4j.nn.conf.graph.GraphVertex gv : cg.getComputationGraphConfiguration().getVertices().values()) { @@ -767,7 +767,7 @@ public class IntegrationTestRunner { } - private static void checkLayerClearance(Model m) { + private static void checkLayerClearance(IModel m) { //Check that the input fields for all layers have been cleared org.deeplearning4j.nn.api.Layer[] layers; if (m instanceof MultiLayerNetwork) { @@ -801,7 +801,7 @@ public class IntegrationTestRunner { } } - private static void validateLayerIterCounts(Model m, int expEpoch, int expIter){ + private static void validateLayerIterCounts(IModel m, int expEpoch, int expIter){ //Check that the iteration and epoch counts - on the layers - are synced org.deeplearning4j.nn.api.Layer[] layers; if (m instanceof MultiLayerNetwork) { @@ -817,7 +817,7 @@ public class IntegrationTestRunner { } - private static Map getFrozenLayerParamCopies(Model m){ + private static Map getFrozenLayerParamCopies(IModel m){ Map out = new LinkedHashMap<>(); org.deeplearning4j.nn.api.Layer[] layers; if (m instanceof MultiLayerNetwork) { @@ -832,7 +832,7 @@ public class IntegrationTestRunner { if(m instanceof MultiLayerNetwork){ paramPrefix = l.getIndex() + "_"; } else { - paramPrefix = l.conf().getLayer().getLayerName() + "_"; + paramPrefix = l.getLayerConfiguration().getLayerName() + "_"; } Map paramTable = l.paramTable(); for(Map.Entry e : paramTable.entrySet()){ @@ -854,7 +854,7 @@ public class IntegrationTestRunner { return out; } - public static void checkFrozenParams(Map copiesBeforeTraining, Model m){ + public static void checkFrozenParams(Map copiesBeforeTraining, IModel m){ for(Map.Entry e : copiesBeforeTraining.entrySet()){ INDArray actual = m.getParam(e.getKey()); assertEquals(e.getValue(), actual, e.getKey()); @@ -939,7 +939,7 @@ public class IntegrationTestRunner { } private static boolean isLayerConfig(Class c) { - return Layer.class.isAssignableFrom(c); + return LayerConfiguration.class.isAssignableFrom(c); } private static boolean isPreprocessorConfig(Class c) { diff --git a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/TestCase.java b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/TestCase.java index b2d76f04a..41afafa4e 100644 --- a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/TestCase.java +++ b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/TestCase.java @@ -21,7 +21,7 @@ package org.deeplearning4j.integration; import lombok.Data; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.evaluation.IEvaluation; import org.nd4j.linalg.api.ndarray.INDArray; @@ -89,7 +89,7 @@ public abstract class TestCase { /** * Required for pretrained models (testType == TestType.PRETRAINED) */ - public Model getPretrainedModel() throws Exception { + public IModel getPretrainedModel() throws Exception { throw new RuntimeException("Implementations must override this method if used"); } diff --git a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/TestUtils.java b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/TestUtils.java index e03f2a523..bbe38a662 100644 --- a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/TestUtils.java +++ b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/TestUtils.java @@ -22,7 +22,7 @@ package org.deeplearning4j.integration; import org.apache.commons.compress.utils.IOUtils; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.util.ModelSerializer; @@ -48,15 +48,15 @@ public class TestUtils { ByteArrayInputStream bais = new ByteArrayInputStream(bytes); restored = ModelSerializer.restoreMultiLayerNetwork(bais, true); - assertEquals(net.getLayerWiseConfigurations(), restored.getLayerWiseConfigurations()); + assertEquals(net.getConfiguration(), restored.getConfiguration()); assertEquals(net.params(), restored.params()); } catch (IOException e){ //Should never happen throw new RuntimeException(e); } - //Also check the MultiLayerConfiguration is serializable (required by Spark etc) - MultiLayerConfiguration conf = net.getLayerWiseConfigurations(); + //Also check the NeuralNetConfiguration is serializable (required by Spark etc) + NeuralNetConfiguration conf = net.getConfiguration(); serializeDeserializeJava(conf); return restored; diff --git a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/CNN1DTestCases.java b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/CNN1DTestCases.java index d65a0a9cc..ec116ca31 100644 --- a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/CNN1DTestCases.java +++ b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/CNN1DTestCases.java @@ -80,12 +80,12 @@ public class CNN1DTestCases { CharacterIterator iter = CharacterIterator.getShakespeareIterator(miniBatchSize,exampleLength); int nOut = iter.totalOutcomes(); - return new NeuralNetConfiguration.Builder() + return ((NeuralNetConfiguration.NeuralNetConfigurationBuilder)NeuralNetConfiguration.builder() .dataType(DataType.FLOAT) .seed(12345) .weightInit(WeightInit.XAVIER) .updater(new Adam(0.01)) - .convolutionMode(ConvolutionMode.Same) + .convolutionMode(ConvolutionMode.Same)) .graphBuilder() .addInputs("in") .layer("0", new Convolution1DLayer.Builder().nOut(32).activation(Activation.TANH).kernelSize(3).stride(1).build(), "in") diff --git a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/CNN2DTestCases.java b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/CNN2DTestCases.java index 3b351e277..4b7b3f7a3 100644 --- a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/CNN2DTestCases.java +++ b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/CNN2DTestCases.java @@ -32,7 +32,7 @@ import org.deeplearning4j.datasets.fetchers.DataSetType; import org.deeplearning4j.datasets.iterator.EarlyTerminationDataSetIterator; import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; import org.deeplearning4j.datasets.iterator.impl.TinyImageNetDataSetIterator; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.*; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -98,13 +98,13 @@ public class CNN2DTestCases { int outputNum = 10; // The number of possible outcomes int seed = 123; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.FLOAT) .seed(seed) .l2(0.0005) .weightInit(WeightInit.XAVIER) .updater(new Nesterovs(0.01, 0.9)) - .list() + .layer(0, new ConvolutionLayer.Builder(5, 5) //nIn and nOut specify depth. nIn here is the nChannels and nOut is the number of filters to be applied .nIn(nChannels) @@ -132,7 +132,7 @@ public class CNN2DTestCases { .nOut(outputNum) .activation(Activation.SOFTMAX) .build()) - .setInputType(InputType.convolutionalFlat(28, 28, 1)) //See note below + .inputType(InputType.convolutionalFlat(28, 28, 1)) //See note below .build(); return conf; @@ -207,7 +207,7 @@ public class CNN2DTestCases { } @Override - public Model getPretrainedModel() throws Exception { + public IModel getPretrainedModel() throws Exception { VGG16 vgg16 = VGG16.builder() .seed(12345) .build(); @@ -294,7 +294,7 @@ public class CNN2DTestCases { } @Override - public Model getPretrainedModel() throws Exception { + public IModel getPretrainedModel() throws Exception { int nClasses = 10; int nBoxes = 5; double lambdaNoObj = 0.5; @@ -403,20 +403,20 @@ public class CNN2DTestCases { } @Override - public Model getPretrainedModel() throws Exception { + public IModel getPretrainedModel() throws Exception { Map lrSchedule = new HashMap<>(); lrSchedule.put(0, 0.01); lrSchedule.put(1000, 0.005); lrSchedule.put(3000, 0.001); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.FLOAT) .seed(12345) .l2(0.0005) .weightInit(WeightInit.XAVIER) .updater(new Nesterovs(0.01, 0.9)) - .list() + .layer(0, new ConvolutionLayer.Builder(5, 5) //nIn and nOut specify depth. nIn here is the nChannels and nOut is the number of filters to be applied .nIn(1) @@ -446,7 +446,7 @@ public class CNN2DTestCases { .nOut(10) .activation(Activation.SOFTMAX) .build()) - .setInputType(InputType.convolutionalFlat(28, 28, 1)) //See note below + .inputType(InputType.convolutionalFlat(28, 28, 1)) //See note below .build(); diff --git a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/CNN3DTestCases.java b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/CNN3DTestCases.java index f856d5159..157116ba9 100644 --- a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/CNN3DTestCases.java +++ b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/CNN3DTestCases.java @@ -24,7 +24,6 @@ import org.deeplearning4j.datasets.iterator.impl.SingletonMultiDataSetIterator; import org.deeplearning4j.integration.ModelType; import org.deeplearning4j.integration.TestCase; import org.deeplearning4j.nn.conf.ConvolutionMode; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.Convolution3D; @@ -76,13 +75,13 @@ public class CNN3DTestCases { int outputNum = 10; // The number of possible outcomes int seed = 123; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .seed(seed) .l2(0.0005) .weightInit(WeightInit.XAVIER) .updater(new Nesterovs(0.01, 0.9)) .convolutionMode(ConvolutionMode.Same) - .list() + .layer(new Convolution3D.Builder(3,3,3) .dataFormat(Convolution3D.DataFormat.NCDHW) .nIn(nChannels) @@ -98,7 +97,7 @@ public class CNN3DTestCases { .nOut(outputNum) .activation(Activation.SOFTMAX) .build()) - .setInputType(InputType.convolutional3D(8,8,8,nChannels)) + .inputType(InputType.convolutional3D(8,8,8,nChannels)) .build(); return conf; diff --git a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/MLPTestCases.java b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/MLPTestCases.java index 9a58e5138..69e9fa4cd 100644 --- a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/MLPTestCases.java +++ b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/MLPTestCases.java @@ -28,7 +28,6 @@ import org.datavec.api.split.FileSplit; import org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator; import org.deeplearning4j.datasets.iterator.EarlyTerminationDataSetIterator; import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.DenseLayer; @@ -93,7 +92,7 @@ public class MLPTestCases { @Override public Object getConfiguration() { - return new NeuralNetConfiguration.Builder() + return NeuralNetConfiguration.builder() .dataType(DataType.FLOAT) .seed(12345) .updater(new Adam(new MapSchedule.Builder(ScheduleType.ITERATION) @@ -104,13 +103,13 @@ public class MLPTestCases { .add(14, 1e-2) .build())) .l1(1e-3).l2(1e-3) - .list() + .layer(new DenseLayer.Builder().activation(Activation.TANH).nOut(64).build()) .layer(new OutputLayer.Builder().nOut(10) .lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX) .build()) - .setInputType(InputType.convolutionalFlat(28,28,1)) + .inputType(InputType.convolutionalFlat(28,28,1)) .build(); } @@ -198,11 +197,11 @@ public class MLPTestCases { int numHiddenNodes = 20; //log.info("Build model...."); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.FLOAT) .seed(seed) .updater(new Nesterovs(learningRate, 0.9)) - .list() + .layer(0, new DenseLayer.Builder().nIn(numInputs).nOut(numHiddenNodes) .weightInit(WeightInit.XAVIER) .activation(Activation.RELU) diff --git a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/RNNTestCases.java b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/RNNTestCases.java index a2cf437fe..edb312c0f 100644 --- a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/RNNTestCases.java +++ b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/RNNTestCases.java @@ -112,20 +112,20 @@ public class RNNTestCases { int lstmLayerSize = 200; //Number of units in each GravesLSTM layer int tbpttLength = 50; //Length for truncated backpropagation through time. i.e., do parameter updates ever 50 characters - return new NeuralNetConfiguration.Builder() + return NeuralNetConfiguration.builder() .dataType(DataType.FLOAT) .seed(12345) .l2(0.001) .weightInit(WeightInit.XAVIER) .updater(new Adam(1e-3)) - .list() + .layer(0, new LSTM.Builder().nIn(iter.inputColumns()).nOut(lstmLayerSize) .activation(Activation.TANH).build()) .layer(1, new LSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize) .activation(Activation.TANH).build()) .layer(2, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX) //MCXENT + softmax for classification .nIn(lstmLayerSize).nOut(nOut).build()) - .backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(tbpttLength).tBPTTBackwardLength(tbpttLength) + .backpropType(BackpropType.TruncatedBPTT).tbpttFwdLength(tbpttLength).tbpttBackLength(tbpttLength) .build(); } @@ -195,19 +195,19 @@ public class RNNTestCases { @Override public Object getConfiguration() throws Exception { - return new NeuralNetConfiguration.Builder() + return NeuralNetConfiguration.builder() .dataType(DataType.FLOAT) .seed(12345) .updater(new Adam(5e-2)) .l1(1e-3).l2(1e-3) - .list() + .layer(0, new LSTM.Builder().activation(Activation.TANH).nOut(10).build()) .layer(new GlobalPoolingLayer.Builder().poolingType(PoolingType.AVG).build()) .layer(new OutputLayer.Builder().nOut(6) .lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX) .build()) - .setInputType(InputType.recurrent(1)) + .inputType(InputType.recurrent(1)) .build(); } @@ -316,19 +316,19 @@ public class RNNTestCases { @Override public Object getConfiguration() throws Exception { - return new NeuralNetConfiguration.Builder() + return NeuralNetConfiguration.builder() .dataType(DataType.FLOAT) .seed(12345) .updater(new Adam(5e-2)) .l1(1e-3).l2(1e-3) - .list() + .layer(0, new Bidirectional(new LSTM.Builder().activation(Activation.TANH).nOut(10).build())) .layer(new GlobalPoolingLayer.Builder().poolingType(PoolingType.AVG).build()) .layer(new OutputLayer.Builder().nOut(6) .lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX) .build()) - .setInputType(InputType.recurrent(1)) + .inputType(InputType.recurrent(1)) .build(); } diff --git a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/UnsupervisedTestCases.java b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/UnsupervisedTestCases.java index 574e3be2d..84b60ffd6 100644 --- a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/UnsupervisedTestCases.java +++ b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/UnsupervisedTestCases.java @@ -72,13 +72,13 @@ public class UnsupervisedTestCases { @Override public Object getConfiguration() { - return new NeuralNetConfiguration.Builder() + return NeuralNetConfiguration.builder() .dataType(DataType.FLOAT) .seed(12345) .updater(new Adam(1e-3)) .weightInit(WeightInit.XAVIER) .l2(1e-4) - .list() + .layer(0, new VariationalAutoencoder.Builder() .activation(Activation.TANH) .encoderLayerSizes(256, 256) //2 encoder layers, each of size 256 diff --git a/build.gradle b/build.gradle index 20e45b528..a3a070c2b 100644 --- a/build.gradle +++ b/build.gradle @@ -66,9 +66,9 @@ allprojects { Project proj -> plugins.withType(JavaPlugin) { sourceCompatibility = JavaVersion.VERSION_11 - targetCompatibility = JavaVersion.VERSION_1_8 + targetCompatibility = JavaVersion.VERSION_11 tasks.withType(JavaCompile) { - options.release = 8 + options.release = 11 } dependencies { @@ -86,7 +86,6 @@ allprojects { Project proj -> testImplementation 'org.junit.jupiter:junit-jupiter-engine' testImplementation 'org.junit.jupiter:junit-jupiter-api' testImplementation 'org.junit.jupiter:junit-jupiter-params' - implementation "org.slf4j:slf4j-api" implementation "org.slf4j:slf4j-simple" diff --git a/cavis-dnn/cavis-dnn-core/src/main/java/net/brutex/ai/dnn/core/util/ANSI.java b/cavis-dnn/cavis-dnn-core/src/main/java/net/brutex/ai/dnn/core/util/ANSI.java new file mode 100644 index 000000000..bd2247445 --- /dev/null +++ b/cavis-dnn/cavis-dnn-core/src/main/java/net/brutex/ai/dnn/core/util/ANSI.java @@ -0,0 +1,52 @@ +/* + * + * ****************************************************************************** + * * + * * This program and the accompanying materials are made available under the + * * terms of the Apache License, Version 2.0 which is available at + * * https://www.apache.org/licenses/LICENSE-2.0. + * * + * * See the NOTICE file distributed with this work for additional + * * information regarding copyright ownership. + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * * License for the specific language governing permissions and limitations + * * under the License. + * * + * * SPDX-License-Identifier: Apache-2.0 + * ***************************************************************************** + * + */ + +package net.brutex.ai.dnn.core.util; + +/** + * ANSI colour codes + */ +public enum ANSI { + BLACK("\u001B[30m"), + RED("\u001B[31m"), + GREEN("\u001B[32m"), + YELLOW("\u001B[33m"), + BLUE("\u001B[34m"), + PURPLE("\u001B[35m"), + CYAN("\u001B[36m"), + WHITE("\u001B[37m"), + + ANSI_RESET("\u001B[0m"), + + BLACK_BACKGROUND("\u001B[40m"), + RED_BACKGROUND("\u001B[41m"), + GREEN_BACKGROUND("\u001B[42m"), + YELLOW_BACKGROUND("\u001B[43m"), + BLUE_BACKGROUND("\u001B[44m"), + PURPLE_BACKGROUND("\u001B[45m"), + CYAN_BACKGROUND("\u001B[46m"), + WHITE_BACKGROUND("\u001B[47m"); + + String code; + ANSI(String code) { + this.code = code; + } +} diff --git a/cavis-dnn/cavis-dnn-core/src/main/java/org/deeplearning4j/core/listener/SystemInfoFilePrintListener.java b/cavis-dnn/cavis-dnn-core/src/main/java/org/deeplearning4j/core/listener/SystemInfoFilePrintListener.java index 88f8a2bd8..d9e3d7b6f 100644 --- a/cavis-dnn/cavis-dnn-core/src/main/java/org/deeplearning4j/core/listener/SystemInfoFilePrintListener.java +++ b/cavis-dnn/cavis-dnn-core/src/main/java/org/deeplearning4j/core/listener/SystemInfoFilePrintListener.java @@ -23,8 +23,8 @@ package org.deeplearning4j.core.listener; import lombok.NonNull; import lombok.Builder; import lombok.extern.slf4j.Slf4j; +import net.brutex.ai.dnn.api.IModel; import org.apache.commons.io.FileUtils; -import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.optimize.api.TrainingListener; import org.nd4j.linalg.api.ndarray.INDArray; import oshi.json.SystemInfo; @@ -56,12 +56,12 @@ public class SystemInfoFilePrintListener implements TrainingListener { } @Override - public void iterationDone(Model model, int iteration, int epoch) { + public void iterationDone(IModel model, int iteration, int epoch) { } @Override - public void onEpochStart(Model model) { + public void onEpochStart(IModel model) { if(!printOnEpochStart || printFileTarget == null) return; @@ -70,7 +70,7 @@ public class SystemInfoFilePrintListener implements TrainingListener { } @Override - public void onEpochEnd(Model model) { + public void onEpochEnd(IModel model) { if(!printOnEpochEnd || printFileTarget == null) return; @@ -79,7 +79,7 @@ public class SystemInfoFilePrintListener implements TrainingListener { } @Override - public void onForwardPass(Model model, List activations) { + public void onForwardPass(IModel model, List activations) { if(!printOnBackwardPass || printFileTarget == null) return; @@ -88,7 +88,7 @@ public class SystemInfoFilePrintListener implements TrainingListener { } @Override - public void onForwardPass(Model model, Map activations) { + public void onForwardPass(IModel model, Map activations) { if(!printOnForwardPass || printFileTarget == null) return; @@ -97,7 +97,7 @@ public class SystemInfoFilePrintListener implements TrainingListener { } @Override - public void onGradientCalculation(Model model) { + public void onGradientCalculation(IModel model) { if(!printOnGradientCalculation || printFileTarget == null) return; @@ -107,7 +107,7 @@ public class SystemInfoFilePrintListener implements TrainingListener { } @Override - public void onBackwardPass(Model model) { + public void onBackwardPass(IModel model) { if(!printOnBackwardPass || printFileTarget == null) return; diff --git a/cavis-dnn/cavis-dnn-core/src/main/java/org/deeplearning4j/core/listener/SystemInfoPrintListener.java b/cavis-dnn/cavis-dnn-core/src/main/java/org/deeplearning4j/core/listener/SystemInfoPrintListener.java index 5b115d542..e4bdfcda6 100644 --- a/cavis-dnn/cavis-dnn-core/src/main/java/org/deeplearning4j/core/listener/SystemInfoPrintListener.java +++ b/cavis-dnn/cavis-dnn-core/src/main/java/org/deeplearning4j/core/listener/SystemInfoPrintListener.java @@ -22,7 +22,7 @@ package org.deeplearning4j.core.listener; import lombok.Builder; import lombok.extern.slf4j.Slf4j; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.optimize.api.TrainingListener; import org.nd4j.linalg.api.ndarray.INDArray; import oshi.json.SystemInfo; @@ -49,12 +49,12 @@ public class SystemInfoPrintListener implements TrainingListener { private static final String SYSTEM_INFO = "System info on epoch end: "; @Override - public void iterationDone(Model model, int iteration, int epoch) { + public void iterationDone(IModel model, int iteration, int epoch) { } @Override - public void onEpochStart(Model model) { + public void onEpochStart(IModel model) { if(!printOnEpochStart) return; @@ -64,7 +64,7 @@ public class SystemInfoPrintListener implements TrainingListener { } @Override - public void onEpochEnd(Model model) { + public void onEpochEnd(IModel model) { if(!printOnEpochEnd) return; @@ -74,7 +74,7 @@ public class SystemInfoPrintListener implements TrainingListener { } @Override - public void onForwardPass(Model model, List activations) { + public void onForwardPass(IModel model, List activations) { if(!printOnBackwardPass) return; @@ -84,7 +84,7 @@ public class SystemInfoPrintListener implements TrainingListener { } @Override - public void onForwardPass(Model model, Map activations) { + public void onForwardPass(IModel model, Map activations) { if(!printOnForwardPass) return; @@ -94,7 +94,7 @@ public class SystemInfoPrintListener implements TrainingListener { } @Override - public void onGradientCalculation(Model model) { + public void onGradientCalculation(IModel model) { if(!printOnGradientCalculation) return; @@ -104,7 +104,7 @@ public class SystemInfoPrintListener implements TrainingListener { } @Override - public void onBackwardPass(Model model) { + public void onBackwardPass(IModel model) { if(!printOnBackwardPass) return; SystemInfo systemInfo = new SystemInfo(); diff --git a/cavis-dnn/cavis-dnn-core/src/main/java/org/deeplearning4j/core/util/ModelGuesser.java b/cavis-dnn/cavis-dnn-core/src/main/java/org/deeplearning4j/core/util/ModelGuesser.java index 70b250978..3ab6eec8f 100644 --- a/cavis-dnn/cavis-dnn-core/src/main/java/org/deeplearning4j/core/util/ModelGuesser.java +++ b/cavis-dnn/cavis-dnn-core/src/main/java/org/deeplearning4j/core/util/ModelGuesser.java @@ -21,13 +21,13 @@ package org.deeplearning4j.core.util; import lombok.extern.slf4j.Slf4j; +import net.brutex.ai.dnn.api.IModel; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.deeplearning4j.common.util.DL4JFileUtils; import org.deeplearning4j.common.config.DL4JSystemProperties; -import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.modelimport.keras.KerasModelImport; import org.deeplearning4j.util.ModelSerializer; import org.nd4j.linalg.dataset.api.preprocessor.Normalizer; @@ -80,7 +80,7 @@ public class ModelGuesser { //note here that we load json BEFORE YAML. YAML //turns out to load just fine *accidentally* try { - return MultiLayerConfiguration.fromJson(input); + return NeuralNetConfiguration.fromJson(input); } catch (Exception e) { log.warn("Tried multi layer config from json", e); try { @@ -96,7 +96,7 @@ public class ModelGuesser { } catch (Exception e3) { log.warn("Tried computation graph from json"); try { - return MultiLayerConfiguration.fromYaml(input); + return NeuralNetConfiguration.fromYaml(input); } catch (Exception e4) { log.warn("Tried multi layer configuration from yaml"); try { @@ -142,7 +142,7 @@ public class ModelGuesser { * @return the loaded model * @throws Exception */ - public static Model loadModelGuess(String path) throws Exception { + public static IModel loadModelGuess(String path) throws Exception { try { return ModelSerializer.restoreMultiLayerNetwork(new File(path), true); } catch (Exception e) { @@ -185,7 +185,7 @@ public class ModelGuesser { * @return the loaded model * @throws Exception */ - public static Model loadModelGuess(InputStream stream) throws Exception { + public static IModel loadModelGuess(InputStream stream) throws Exception { return loadModelGuess(stream, null); } @@ -194,7 +194,7 @@ public class ModelGuesser { * @param stream Stream of the model file * @param tempDirectory Temporary/working directory. May be null. */ - public static Model loadModelGuess(InputStream stream, File tempDirectory) throws Exception { + public static IModel loadModelGuess(InputStream stream, File tempDirectory) throws Exception { //Currently (Nov 2017): KerasModelImport doesn't support loading from input streams //Simplest solution here: write to a temporary file File f; diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/LayerHelperValidationUtil.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/LayerHelperValidationUtil.java index cc1220762..8da3ff4e5 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/LayerHelperValidationUtil.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/LayerHelperValidationUtil.java @@ -26,6 +26,7 @@ import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.common.config.DL4JClassLoading; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.SubsamplingLayer; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.optimize.listeners.CollectScoresListener; @@ -99,7 +100,7 @@ public class LayerHelperValidationUtil { //Don't allow fallback: for(Layer l : netOrig.getLayers()){ - org.deeplearning4j.nn.conf.layers.Layer lConf = l.conf().getLayer(); + LayerConfiguration lConf = l.getLayerConfiguration(); if(lConf instanceof ConvolutionLayer){ ((ConvolutionLayer) lConf).setCudnnAllowFallback(false); } else if(lConf instanceof SubsamplingLayer){ @@ -108,12 +109,12 @@ public class LayerHelperValidationUtil { } - MultiLayerNetwork net1NoHelper = new MultiLayerNetwork(netOrig.getLayerWiseConfigurations().clone()); + MultiLayerNetwork net1NoHelper = new MultiLayerNetwork(netOrig.getConfiguration().clone()); net1NoHelper.init(); log.info("Removing all layer helpers from network copy 1"); removeHelpers(net1NoHelper.getLayers(), null); - MultiLayerNetwork net2With = new MultiLayerNetwork(netOrig.getLayerWiseConfigurations().clone()); + MultiLayerNetwork net2With = new MultiLayerNetwork(netOrig.getConfiguration().clone()); net2With.init(); net2With.params().assign(netOrig.params()); log.info("Removing all except for specified helpers from network copy 2: " + t.getAllowHelpersForClasses()); @@ -133,7 +134,7 @@ public class LayerHelperValidationUtil { enableCppHelpers(); } List ff2 = net2With.feedForward(t.getFeatures(), train); - List paramKeys = new ArrayList<>(net1NoHelper.paramTable().keySet()); + List paramKeys = new ArrayList<>(net1NoHelper.getParamTable().keySet()); Collections.sort(paramKeys); for (String p : paramKeys) { INDArray p1 = net1NoHelper.getParam(p); @@ -224,7 +225,7 @@ public class LayerHelperValidationUtil { } net2With.computeGradientAndScore(); - List paramKeys = new ArrayList<>(net1NoHelper.paramTable().keySet()); + List paramKeys = new ArrayList<>(net1NoHelper.getParamTable().keySet()); Collections.sort(paramKeys); for(String p : paramKeys){ INDArray g1 = net1NoHelper.gradient().gradientForVariable().get(p); @@ -252,7 +253,7 @@ public class LayerHelperValidationUtil { Preconditions.checkNotNull(t.getData(), "DataSetIterator is not set (null)"); log.info("Testing run-to-run consistency of training with layer helper"); - net2With = new MultiLayerNetwork(netOrig.getLayerWiseConfigurations().clone()); + net2With = new MultiLayerNetwork(netOrig.getConfiguration().clone()); net2With.init(); net2With.params().assign(netOrig.params()); log.info("Removing all except for specified layer helpers from network copy 2: " + t.getAllowHelpersForClasses()); @@ -264,7 +265,7 @@ public class LayerHelperValidationUtil { for( int i=0; i<2; i++ ) { - net2With = new MultiLayerNetwork(netOrig.getLayerWiseConfigurations().clone()); + net2With = new MultiLayerNetwork(netOrig.getConfiguration().clone()); net2With.init(); net2With.params().assign(netOrig.params()); log.info("Removing all except for specified layer helpers from network copy 2: " + t.getAllowHelpersForClasses()); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/RandomTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/RandomTests.java index 63b13e660..d939dab81 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/RandomTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/RandomTests.java @@ -23,19 +23,15 @@ package org.deeplearning4j; import org.deeplearning4j.datasets.iterator.EarlyTerminationDataSetIterator; import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; import org.deeplearning4j.nn.api.OptimizationAlgorithm; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.junit.jupiter.api.Test; -import org.nd4j.common.resources.Resources; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; -import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.learning.config.RmsProp; import org.nd4j.linalg.lossfunctions.LossFunctions; -import java.nio.file.Files; import java.util.concurrent.CountDownLatch; //@Ignore @@ -44,8 +40,8 @@ public class RandomTests extends BaseDL4JTest { @Test public void testReproduce() throws Exception { - final MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new RmsProp()) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() + final NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new RmsProp()) + .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(28 * 28).nOut(10) .activation(Activation.TANH).build()) .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/TestUtils.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/TestUtils.java index cecc969ac..6e4456ef2 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/TestUtils.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/TestUtils.java @@ -23,7 +23,7 @@ package org.deeplearning4j; import org.apache.commons.compress.utils.IOUtils; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.RNNFormat; import org.deeplearning4j.nn.conf.layers.BaseLayer; import org.deeplearning4j.nn.conf.layers.samediff.AbstractSameDiffLayer; @@ -66,15 +66,15 @@ public class TestUtils { ByteArrayInputStream bais = new ByteArrayInputStream(bytes); restored = ModelSerializer.restoreMultiLayerNetwork(bais, true); - assertEquals(net.getLayerWiseConfigurations(), restored.getLayerWiseConfigurations()); + assertEquals(net.getConfiguration(), restored.getConfiguration()); assertEquals(net.params(), restored.params()); } catch (IOException e){ //Should never happen throw new RuntimeException(e); } - //Also check the MultiLayerConfiguration is serializable (required by Spark etc) - MultiLayerConfiguration conf = net.getLayerWiseConfigurations(); + //Also check the NeuralNetConfiguration is serializable (required by Spark etc) + NeuralNetConfiguration conf = net.getConfiguration(); serializeDeserializeJava(conf); return restored; @@ -317,14 +317,14 @@ public class TestUtils { for(Layer l : layers){ //Don't use instanceof here - there are sub conv subclasses if(l.getClass() == ConvolutionLayer.class || l instanceof SubsamplingLayer || l instanceof BatchNormalization || l instanceof LSTM){ - Preconditions.checkNotNull(l.getHelper(), l.conf().getLayer().getLayerName()); + Preconditions.checkNotNull(l.getHelper(), l.getLayerConfiguration().getLayerName()); } } } public static void assertHelpersAbsent(Layer[] layers) throws Exception { for(Layer l : layers){ - Preconditions.checkState(l.getHelper() == null, l.conf().getLayer().getLayerName()); + Preconditions.checkState(l.getHelper() == null, l.getLayerConfiguration().getLayerName()); } } } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/datasets/iterator/DataSetIteratorTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/datasets/iterator/DataSetIteratorTest.java index dc9b3ffcf..f391f35f9 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/datasets/iterator/DataSetIteratorTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/datasets/iterator/DataSetIteratorTest.java @@ -31,8 +31,8 @@ import org.deeplearning4j.datasets.iterator.impl.*; import org.deeplearning4j.eval.Evaluation; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.GradientNormalization; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration.NeuralNetConfigurationBuilder; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; @@ -168,9 +168,9 @@ public class DataSetIteratorTest extends BaseDL4JTest { LFWDataSetIterator lfw = new LFWDataSetIterator(batchSize, numSamples, new int[] {numRows, numColumns, numChannels}, outputNum, false, true, 1.0, new Random(seed)); - MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(seed) + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().seed(seed) .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() + .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .layer(0, new ConvolutionLayer.Builder(5, 5).nIn(numChannels).nOut(6) .weightInit(WeightInit.XAVIER).activation(Activation.RELU).build()) .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] {2, 2}) @@ -178,8 +178,7 @@ public class DataSetIteratorTest extends BaseDL4JTest { .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nOut(outputNum).weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX) .build()) - .setInputType(InputType.convolutionalFlat(numRows, numColumns, numChannels)) - ; + .inputType(InputType.convolutionalFlat(numRows, numColumns, numChannels)); MultiLayerNetwork model = new MultiLayerNetwork(builder.build()); model.init(); @@ -229,9 +228,9 @@ public class DataSetIteratorTest extends BaseDL4JTest { Cifar10DataSetIterator cifar = new Cifar10DataSetIterator(batchSize); - MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(seed) + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().seed(seed) .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() + .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .layer(0, new ConvolutionLayer.Builder(5, 5).nIn(channels).nOut(6).weightInit(WeightInit.XAVIER) .activation(Activation.RELU).build()) .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] {2, 2}) @@ -240,7 +239,7 @@ public class DataSetIteratorTest extends BaseDL4JTest { .nOut(outputNum).weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX) .build()) - .setInputType(InputType.convolutionalFlat(height, width, channels)); + .inputType(InputType.convolutionalFlat(height, width, channels)); MultiLayerNetwork model = new MultiLayerNetwork(builder.build()); model.init(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/earlystopping/TestEarlyStopping.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/earlystopping/TestEarlyStopping.java index 13ae46efb..12e17fa3a 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/earlystopping/TestEarlyStopping.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/earlystopping/TestEarlyStopping.java @@ -38,10 +38,9 @@ import org.deeplearning4j.earlystopping.scorecalc.*; import org.deeplearning4j.earlystopping.termination.*; import org.deeplearning4j.earlystopping.trainer.EarlyStoppingTrainer; import org.deeplearning4j.earlystopping.trainer.IEarlyStoppingTrainer; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.GradientNormalization; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.*; import org.deeplearning4j.nn.conf.layers.variational.BernoulliReconstructionDistribution; @@ -133,9 +132,9 @@ public class TestEarlyStopping extends BaseDL4JTest { String msg = i + " - " + sc.getClass().getSimpleName(); log.info("Starting test - {}", msg); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .seed(12345) - .updater(new Sgd(0.5)).weightInit(WeightInit.XAVIER).list() + .updater(new Sgd(0.5)).weightInit(WeightInit.XAVIER) .layer(new DenseLayer.Builder().nIn(4).nOut(4).activation(Activation.TANH).build()) .layer(new OutputLayer.Builder().nIn(4).nOut(3) .activation(Activation.SOFTMAX) @@ -219,9 +218,9 @@ public class TestEarlyStopping extends BaseDL4JTest { @Test public void testEarlyStoppingEveryNEpoch() { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .updater(new Sgd(0.01)).weightInit(WeightInit.XAVIER).list() + .updater(new Sgd(0.01)).weightInit(WeightInit.XAVIER) .layer(0, new OutputLayer.Builder().nIn(4).nOut(3) .activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) @@ -248,9 +247,9 @@ public class TestEarlyStopping extends BaseDL4JTest { @Test public void testEarlyStoppingIrisMultiEpoch() { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .updater(new Sgd(0.001)).weightInit(WeightInit.XAVIER).list() + .updater(new Sgd(0.001)).weightInit(WeightInit.XAVIER) .layer(0, new OutputLayer.Builder().nIn(4).nOut(3) .activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) @@ -297,10 +296,10 @@ public class TestEarlyStopping extends BaseDL4JTest { //Test poor tuning (high LR): should terminate on MaxScoreIterationTerminationCondition Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(5.0)) //Intentionally huge LR - .weightInit(WeightInit.XAVIER).list() + .weightInit(WeightInit.XAVIER) .layer(0, new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); @@ -336,9 +335,9 @@ public class TestEarlyStopping extends BaseDL4JTest { //test termination after max time Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .updater(new Sgd(1e-6)).weightInit(WeightInit.XAVIER).list() + .updater(new Sgd(1e-6)).weightInit(WeightInit.XAVIER) .layer(0, new OutputLayer.Builder().nIn(4).nOut(3) .activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) @@ -379,9 +378,9 @@ public class TestEarlyStopping extends BaseDL4JTest { //Simulate this by setting LR = 0.0 Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .updater(new Sgd(0.0)).weightInit(WeightInit.XAVIER).list() + .updater(new Sgd(0.0)).weightInit(WeightInit.XAVIER) .layer(0, new OutputLayer.Builder().nIn(4).nOut(3) .activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) @@ -419,9 +418,9 @@ public class TestEarlyStopping extends BaseDL4JTest { //Simulate this by setting LR = 0.0 Random rng = new Random(123); Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(123) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(123) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .updater(new Nesterovs(0.0,0.9)).list() + .updater(new Nesterovs(0.0,0.9)) .layer(0, new DenseLayer.Builder().nIn(1).nOut(20) .weightInit(WeightInit.XAVIER).activation( Activation.TANH) @@ -466,9 +465,9 @@ public class TestEarlyStopping extends BaseDL4JTest { @Test public void testEarlyStoppingGetBestModel() { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .updater(new Sgd(0.001)).weightInit(WeightInit.XAVIER).list() + .updater(new Sgd(0.001)).weightInit(WeightInit.XAVIER) .layer(0, new OutputLayer.Builder().nIn(4).nOut(3) .activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) @@ -496,17 +495,17 @@ public class TestEarlyStopping extends BaseDL4JTest { MultiLayerNetwork mln = result.getBestModel(); assertEquals(net.getnLayers(), mln.getnLayers()); - assertEquals(net.conf().getOptimizationAlgo(), mln.conf().getOptimizationAlgo()); - BaseLayer bl = (BaseLayer) net.conf().getLayer(); - assertEquals(bl.getActivationFn().toString(), ((BaseLayer) mln.conf().getLayer()).getActivationFn().toString()); - assertEquals(bl.getIUpdater(), ((BaseLayer) mln.conf().getLayer()).getIUpdater()); + assertEquals(net.getNetConfiguration().getOptimizationAlgo(), mln.getNetConfiguration().getOptimizationAlgo()); + BaseLayer bl = (BaseLayer) net.getLayerConfiguration(); + assertEquals(bl.getActivationFn().toString(), ((BaseLayer) mln.getLayerConfiguration()).getActivationFn().toString()); + assertEquals(bl.getIUpdater(), ((BaseLayer) mln.getLayerConfiguration()).getIUpdater()); } @Test public void testListeners() { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .updater(new Sgd(0.001)).weightInit(WeightInit.XAVIER).list() + .updater(new Sgd(0.001)).weightInit(WeightInit.XAVIER) .layer(0, new OutputLayer.Builder().nIn(4).nOut(3) .activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) @@ -569,8 +568,8 @@ public class TestEarlyStopping extends BaseDL4JTest { Metric.MAE}) { log.info("Metric: " + metric); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() + .layer(new DenseLayer.Builder().nIn(784).nOut(32).build()) .layer(new OutputLayer.Builder().nIn(32).nOut(784).activation(Activation.SIGMOID).lossFunction(LossFunctions.LossFunction.MSE).build()) .build(); @@ -612,8 +611,8 @@ public class TestEarlyStopping extends BaseDL4JTest { Metric.MAE}) { log.info("Metric: " + metric); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() + .layer(new AutoEncoder.Builder().nIn(784).nOut(32).build()) .build(); @@ -655,8 +654,8 @@ public class TestEarlyStopping extends BaseDL4JTest { Metric.MAE}) { log.info("Metric: " + metric); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() + .layer(new VariationalAutoencoder.Builder() .nIn(784).nOut(32) .encoderLayerSizes(64) @@ -700,8 +699,8 @@ public class TestEarlyStopping extends BaseDL4JTest { for(boolean logProb : new boolean[]{false, true}) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() + .layer(new VariationalAutoencoder.Builder() .nIn(784).nOut(32) .encoderLayerSizes(64) @@ -747,8 +746,8 @@ public class TestEarlyStopping extends BaseDL4JTest { for(Evaluation.Metric metric : Evaluation.Metric.values()) { log.info("Metric: " + metric); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() + .layer(new DenseLayer.Builder().nIn(784).nOut(32).build()) .layer(new OutputLayer.Builder().nIn(32).nOut(10).activation(Activation.SOFTMAX).build()) .build(); @@ -784,8 +783,8 @@ public class TestEarlyStopping extends BaseDL4JTest { @Test public void testEarlyStoppingListeners() { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .updater(new Sgd(0.001)).weightInit(WeightInit.XAVIER).list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() + .updater(new Sgd(0.001)).weightInit(WeightInit.XAVIER) .layer(0, new OutputLayer.Builder().nIn(4).nOut(3) .activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) @@ -827,19 +826,19 @@ public class TestEarlyStopping extends BaseDL4JTest { private int maxEpochEnd = -1; @Override - public void onEpochStart(Model model){ + public void onEpochStart(IModel model){ countEpochStart++; maxEpochStart = Math.max(maxEpochStart, BaseOptimizer.getEpochCount(model)); } @Override - public void onEpochEnd(Model model){ + public void onEpochEnd(IModel model){ countEpochEnd++; maxEpochEnd = Math.max(maxEpochEnd, BaseOptimizer.getEpochCount(model)); } @Override - public void iterationDone(Model model, int iteration, int epoch){ + public void iterationDone(IModel model, int iteration, int epoch){ iterCount++; } @@ -859,7 +858,7 @@ public class TestEarlyStopping extends BaseDL4JTest { DataSetIterator test = new SingletonDataSetIterator(ds); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .seed(123) .weightInit(WeightInit.XAVIER) .updater(new Adam(0.1)) @@ -868,7 +867,7 @@ public class TestEarlyStopping extends BaseDL4JTest { .gradientNormalization(GradientNormalization .ClipElementWiseAbsoluteValue) .gradientNormalizationThreshold(1.0) - .list() + .layer(0, new LSTM.Builder() .nIn(10) .nOut(10) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/earlystopping/TestEarlyStoppingCompGraph.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/earlystopping/TestEarlyStoppingCompGraph.java index 4209f8dd3..fb55e2957 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/earlystopping/TestEarlyStoppingCompGraph.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/earlystopping/TestEarlyStoppingCompGraph.java @@ -76,7 +76,7 @@ public class TestEarlyStoppingCompGraph extends BaseDL4JTest { @Test public void testEarlyStoppingIris() { - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(0.001)).weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in") .addLayer("0", new OutputLayer.Builder().nIn(4).nOut(3) @@ -120,7 +120,7 @@ public class TestEarlyStoppingCompGraph extends BaseDL4JTest { //Test poor tuning (high LR): should terminate on MaxScoreIterationTerminationCondition Nd4j.getRandom().setSeed(12345); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(5.0)) //Intentionally huge LR .weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in") @@ -156,7 +156,7 @@ public class TestEarlyStoppingCompGraph extends BaseDL4JTest { //test termination after max time Nd4j.getRandom().setSeed(12345); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(1e-6)).weightInit(WeightInit.XAVIER).graphBuilder() .addInputs("in") @@ -198,7 +198,7 @@ public class TestEarlyStoppingCompGraph extends BaseDL4JTest { //Simulate this by setting LR = 0.0 Nd4j.getRandom().setSeed(12345); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(0.0)).weightInit(WeightInit.XAVIER).graphBuilder() .addInputs("in") @@ -233,7 +233,7 @@ public class TestEarlyStoppingCompGraph extends BaseDL4JTest { @Test public void testListeners() { - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(0.001)).weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in") .addLayer("0", new OutputLayer.Builder().nIn(4).nOut(3) @@ -297,7 +297,7 @@ public class TestEarlyStoppingCompGraph extends BaseDL4JTest { Metric.MAE}) { log.info("Metric: " + metric); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") .layer("0", new DenseLayer.Builder().nIn(784).nOut(32).build(), "in") @@ -343,7 +343,7 @@ public class TestEarlyStoppingCompGraph extends BaseDL4JTest { Metric.MAE}) { log.info("Metric: " + metric); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") .layer("0", new AutoEncoder.Builder().nIn(784).nOut(32).build(), "in") @@ -388,7 +388,7 @@ public class TestEarlyStoppingCompGraph extends BaseDL4JTest { Metric.MAE}) { log.info("Metric: " + metric); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") .layer("0", new VariationalAutoencoder.Builder() @@ -435,7 +435,7 @@ public class TestEarlyStoppingCompGraph extends BaseDL4JTest { for(boolean logProb : new boolean[]{false, true}) { - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .updater(new Adam(1e-5)) .graphBuilder() .addInputs("in") @@ -486,7 +486,7 @@ public class TestEarlyStoppingCompGraph extends BaseDL4JTest { for(Evaluation.Metric metric : Evaluation.Metric.values()) { log.info("Metric: " + metric); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") .layer("0", new DenseLayer.Builder().nIn(784).nOut(32).build(), "in") @@ -526,7 +526,7 @@ public class TestEarlyStoppingCompGraph extends BaseDL4JTest { @Test public void testEarlyStoppingListenersCG() { - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .updater(new Sgd(0.001)).weightInit(WeightInit.XAVIER) .graphBuilder() .addInputs("in") diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/EvalTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/EvalTest.java index 7b44d26c9..8f69cf1d9 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/EvalTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/EvalTest.java @@ -69,7 +69,7 @@ public class EvalTest extends BaseDL4JTest { public void testIris() { // Network config - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).seed(42) .updater(new Sgd(1e-6)).list() @@ -177,7 +177,7 @@ public class EvalTest extends BaseDL4JTest { rrdsi.reset(); Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(new Sgd(0.1)) .list() .layer(0, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) @@ -295,7 +295,7 @@ public class EvalTest extends BaseDL4JTest { int tbpttLength = 10; int tsLength = 5 * tbpttLength + tbpttLength / 2; - MultiLayerConfiguration conf1 = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf1 = NeuralNetConfiguration.builder() .seed(12345) .trainingWorkspaceMode(ws) .inferenceWorkspaceMode(ws) @@ -306,7 +306,7 @@ public class EvalTest extends BaseDL4JTest { .build()) .build(); - MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder() .seed(12345) .trainingWorkspaceMode(ws) .inferenceWorkspaceMode(ws) @@ -314,7 +314,7 @@ public class EvalTest extends BaseDL4JTest { .layer(new LSTM.Builder().nIn(nIn).nOut(layerSize).build()) .layer(new RnnOutputLayer.Builder().nIn(layerSize).nOut(nOut) .activation(Activation.SOFTMAX).build()) - .tBPTTLength(10) + .tbpttFwdLength(10).tbpttBackLength(10) .backpropType(BackpropType.TruncatedBPTT) .build(); @@ -371,7 +371,7 @@ public class EvalTest extends BaseDL4JTest { int tbpttLength = 10; int tsLength = 5 * tbpttLength + tbpttLength / 2; - ComputationGraphConfiguration conf1 = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf1 = NeuralNetConfiguration.builder() .seed(12345) .trainingWorkspaceMode(ws) .inferenceWorkspaceMode(ws) @@ -384,7 +384,7 @@ public class EvalTest extends BaseDL4JTest { .setOutputs("1") .build(); - ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf2 = NeuralNetConfiguration.builder() .seed(12345) .trainingWorkspaceMode(ws) .inferenceWorkspaceMode(ws) @@ -455,12 +455,12 @@ public class EvalTest extends BaseDL4JTest { DataSetIterator testData = new SequenceRecordReaderDataSetIterator(fsr, lsr, 1, -1, true, SequenceRecordReaderDataSetIterator.AlignmentMode.ALIGN_END); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(123) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(123) .list() .layer(0, new LSTM.Builder().activation(Activation.TANH).nIn(3).nOut(3).build()) .layer(1, new RnnOutputLayer.Builder().activation(Activation.SIGMOID).lossFunction(LossFunctions.LossFunction.XENT) .nIn(3).nOut(1).build()) - .backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(10).tBPTTBackwardLength(10) + .backpropType(BackpropType.TruncatedBPTT).tbpttFwdLength(10).tbpttBackLength(10) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -473,7 +473,7 @@ public class EvalTest extends BaseDL4JTest { //Sanity check: https://github.com/eclipse/deeplearning4j/issues/5351 // Network config - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).seed(42) .updater(new Sgd(1e-6)).list() @@ -503,7 +503,7 @@ public class EvalTest extends BaseDL4JTest { public void testMultiOutputEvalSimple(){ Nd4j.getRandom().setSeed(12345); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .seed(12345) .graphBuilder() .addInputs("in") @@ -538,7 +538,7 @@ public class EvalTest extends BaseDL4JTest { public void testMultiOutputEvalCG(){ //Simple sanity check on evaluation - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") .layer("0", new EmbeddingSequenceLayer.Builder().nIn(10).nOut(10).build(), "in") @@ -566,7 +566,7 @@ public class EvalTest extends BaseDL4JTest { @Test public void testInvalidEvaluation(){ - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() .layer(new DenseLayer.Builder().nIn(4).nOut(10).build()) @@ -622,7 +622,7 @@ public class EvalTest extends BaseDL4JTest { //Disable validation, and check same thing: - net.getLayerWiseConfigurations().setValidateOutputLayerConfig(false); + net.getConfiguration().setValidateOutputLayerConfig(false); net.evaluate(iter); net.evaluateROCMultiClass(iter, 0); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/EvaluationToolsTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/EvaluationToolsTests.java index 70271cd95..aa9b2686f 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/EvaluationToolsTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/EvaluationToolsTests.java @@ -23,7 +23,6 @@ package org.deeplearning4j.eval; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator; import org.deeplearning4j.core.evaluation.EvaluationTools; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; @@ -48,7 +47,7 @@ public class EvaluationToolsTests extends BaseDL4JTest { DataSetIterator iter = new IrisDataSetIterator(150, 150); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER).list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER).list() .layer(0, new DenseLayer.Builder().nIn(4).nOut(4).activation(Activation.TANH).build()).layer(1, new OutputLayer.Builder().nIn(4).nOut(2).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) @@ -90,7 +89,7 @@ public class EvaluationToolsTests extends BaseDL4JTest { public void testRocMultiToHtml() throws Exception { DataSetIterator iter = new IrisDataSetIterator(150, 150); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER).list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER).list() .layer(0, new DenseLayer.Builder().nIn(4).nOut(4).activation(Activation.TANH).build()).layer(1, new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/ROCTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/ROCTest.java index 5684a76d6..ca3ad1b54 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/ROCTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/ROCTest.java @@ -22,23 +22,19 @@ package org.deeplearning4j.eval; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.weights.WeightInit; import org.junit.jupiter.api.Test; -import org.nd4j.evaluation.curves.PrecisionRecallCurve; import org.nd4j.evaluation.curves.RocCurve; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.api.ops.random.impl.BernoulliDistribution; import org.nd4j.linalg.dataset.api.DataSet; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize; import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.linalg.indexing.NDArrayIndex; import org.nd4j.linalg.lossfunctions.LossFunctions; import java.util.*; @@ -86,7 +82,7 @@ public class ROCTest extends BaseDL4JTest { DataSetIterator iter = new IrisDataSetIterator(150, 150); Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER).seed(12345) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER).seed(12345) .list() .layer(0, new DenseLayer.Builder().nIn(4).nOut(4).activation(Activation.TANH).build()).layer(1, new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/RegressionEvalTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/RegressionEvalTest.java index b5e2b994e..92991d1cc 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/RegressionEvalTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/RegressionEvalTest.java @@ -23,7 +23,6 @@ package org.deeplearning4j.eval; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.datasets.iterator.ExistingDataSetIterator; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.graph.ComputationGraph; @@ -49,7 +48,7 @@ public class RegressionEvalTest extends BaseDL4JTest { public void testRegressionEvalMethods() { //Basic sanity check - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().weightInit(WeightInit.ZERO).list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().weightInit(WeightInit.ZERO).list() .layer(0, new OutputLayer.Builder().activation(Activation.TANH) .lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(5).build()) .build(); @@ -71,7 +70,7 @@ public class RegressionEvalTest extends BaseDL4JTest { ComputationGraphConfiguration graphConf = - new NeuralNetConfiguration.Builder().weightInit(WeightInit.ZERO).graphBuilder() + NeuralNetConfiguration.builder().weightInit(WeightInit.ZERO).graphBuilder() .addInputs("in").addLayer("0", new OutputLayer.Builder() .lossFunction(LossFunctions.LossFunction.MSE) .activation(Activation.TANH).nIn(10).nOut(5).build(), "in") diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/exceptions/TestInvalidConfigurations.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/exceptions/TestInvalidConfigurations.java index 0a09599bb..be9568f89 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/exceptions/TestInvalidConfigurations.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/exceptions/TestInvalidConfigurations.java @@ -24,7 +24,6 @@ import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.exception.DL4JException; import org.deeplearning4j.nn.conf.ConvolutionMode; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.*; @@ -41,7 +40,7 @@ import static org.junit.jupiter.api.Assertions.fail; public class TestInvalidConfigurations extends BaseDL4JTest { public static MultiLayerNetwork getDensePlusOutput(int nIn, int nOut) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(10).build()) .layer(1, new OutputLayer.Builder().nIn(10).nOut(nOut).build()).build(); @@ -52,7 +51,7 @@ public class TestInvalidConfigurations extends BaseDL4JTest { } public static MultiLayerNetwork getLSTMPlusRnnOutput(int nIn, int nOut) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() .layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(10).build()) .layer(1, new RnnOutputLayer.Builder().nIn(10).nOut(nOut).build()).build(); @@ -63,10 +62,10 @@ public class TestInvalidConfigurations extends BaseDL4JTest { } public static MultiLayerNetwork getCnnPlusOutputLayer(int depthIn, int inH, int inW, int nOut) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() .layer(0, new ConvolutionLayer.Builder().nIn(depthIn).nOut(5).build()) .layer(1, new OutputLayer.Builder().nOut(nOut).build()) - .setInputType(InputType.convolutional(inH, inW, depthIn)).build(); + .inputType(InputType.convolutional(inH, inW, depthIn)).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -90,7 +89,7 @@ public class TestInvalidConfigurations extends BaseDL4JTest { @Test public void testDenseNout0() { try { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() .layer(0, new DenseLayer.Builder().nIn(10).nOut(0).build()) .layer(1, new OutputLayer.Builder().nIn(10).nOut(10).build()).build(); @@ -147,7 +146,7 @@ public class TestInvalidConfigurations extends BaseDL4JTest { @Test public void testLSTMNOut0() { try { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() .layer(0, new GravesLSTM.Builder().nIn(10).nOut(0).build()) .layer(1, new RnnOutputLayer.Builder().nIn(10).nOut(10).build()).build(); @@ -178,10 +177,10 @@ public class TestInvalidConfigurations extends BaseDL4JTest { @Test public void testConvolutionalNOut0() { try { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() .layer(0, new ConvolutionLayer.Builder().nIn(5).nOut(0).build()) .layer(1, new OutputLayer.Builder().nOut(10).build()) - .setInputType(InputType.convolutional(10, 10, 5)).build(); + .inputType(InputType.convolutional(10, 10, 5)).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -207,12 +206,12 @@ public class TestInvalidConfigurations extends BaseDL4JTest { //(10-3+2*0)/2+1 = 7/2 + 1 try { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().convolutionMode(ConvolutionMode.Strict) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().convolutionMode(ConvolutionMode.Strict) .list() .layer(0, new ConvolutionLayer.Builder().kernelSize(3, 2).stride(2, 2).padding(0, 0).nOut(5) .build()) .layer(1, new OutputLayer.Builder().nOut(10).build()) - .setInputType(InputType.convolutional(hIn, wIn, depthIn)).build(); + .inputType(InputType.convolutional(hIn, wIn, depthIn)).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -234,11 +233,11 @@ public class TestInvalidConfigurations extends BaseDL4JTest { int hIn = 10; int wIn = 10; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() .layer(0, new ConvolutionLayer.Builder().kernelSize(7, 7).stride(1, 1).padding(0, 0).nOut(5) .build()) .layer(1, new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).build()) - .setInputType(InputType.convolutional(hIn, wIn, depthIn)).build(); + .inputType(InputType.convolutional(hIn, wIn, depthIn)).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -265,8 +264,8 @@ public class TestInvalidConfigurations extends BaseDL4JTest { //Invalid: (10-3+0)/2+1 = 4.5 - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().convolutionMode(ConvolutionMode.Strict).list() + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder().convolutionMode(ConvolutionMode.Strict).list() .layer(0, new ConvolutionLayer.Builder().kernelSize(3, 3).stride(2, 2) .padding(0, 0).nIn(depthIn).nOut(5).build()) .layer(1, new OutputLayer.Builder().nIn(5 * 4 * 4).nOut(10).activation(Activation.SOFTMAX).build()) @@ -299,22 +298,22 @@ public class TestInvalidConfigurations extends BaseDL4JTest { //(10-3+2*0)/2+1 = 7/2 + 1 try { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() .layer(0, new ConvolutionLayer.Builder().kernelSize(2, 3).stride(2, 2).padding(0, 0).nOut(5) .build()) .layer(1, new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).build()) - .setInputType(InputType.convolutional(hIn, wIn, depthIn)).build(); + .inputType(InputType.convolutional(hIn, wIn, depthIn)).build(); } catch (Exception e) { fail("Did not expect exception with default (truncate)"); } try { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().convolutionMode(ConvolutionMode.Strict) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().convolutionMode(ConvolutionMode.Strict) .list() .layer(0, new ConvolutionLayer.Builder().kernelSize(2, 3).stride(2, 2).padding(0, 0).nOut(5) .build()) .layer(1, new OutputLayer.Builder().nOut(10).build()) - .setInputType(InputType.convolutional(hIn, wIn, depthIn)).build(); + .inputType(InputType.convolutional(hIn, wIn, depthIn)).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -338,12 +337,12 @@ public class TestInvalidConfigurations extends BaseDL4JTest { //(10-3+2*0)/2+1 = 7/2 + 1 try { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().convolutionMode(ConvolutionMode.Strict) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().convolutionMode(ConvolutionMode.Strict) .list() .layer(0, new SubsamplingLayer.Builder().kernelSize(2, 3).stride(2, 2).padding(0, 0) .build()) .layer(1, new OutputLayer.Builder().nOut(10).build()) - .setInputType(InputType.convolutional(hIn, wIn, depthIn)).build(); + .inputType(InputType.convolutional(hIn, wIn, depthIn)).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/exceptions/TestInvalidInput.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/exceptions/TestInvalidInput.java index 7d958355a..4e35f44eb 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/exceptions/TestInvalidInput.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/exceptions/TestInvalidInput.java @@ -23,7 +23,6 @@ package org.deeplearning4j.exceptions; import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.exception.DL4JException; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.*; @@ -43,7 +42,7 @@ public class TestInvalidInput extends BaseDL4JTest { @Test public void testInputNinMismatchDense() { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()) .layer(1, new OutputLayer.Builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build()).build(); @@ -64,7 +63,7 @@ public class TestInvalidInput extends BaseDL4JTest { @Test public void testLabelsNOutMismatchOutputLayer() { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()) .layer(1, new OutputLayer.Builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build()).build(); @@ -85,7 +84,7 @@ public class TestInvalidInput extends BaseDL4JTest { @Test public void testLabelsNOutMismatchRnnOutputLayer() { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() .layer(0, new LSTM.Builder().nIn(5).nOut(5).build()) .layer(1, new RnnOutputLayer.Builder().nIn(5).nOut(5).activation(Activation.SOFTMAX).build()).build(); @@ -112,10 +111,10 @@ public class TestInvalidInput extends BaseDL4JTest { int w = 16; int d = 3; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() .layer(0, new ConvolutionLayer.Builder().nIn(d).nOut(5).build()) .layer(1, new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).build()) - .setInputType(InputType.convolutional(h, w, d)).build(); + .inputType(InputType.convolutional(h, w, d)).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -139,10 +138,10 @@ public class TestInvalidInput extends BaseDL4JTest { int w = 16; int d = 3; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() .layer(0, new ConvolutionLayer.Builder().nIn(d).nOut(5).build()) .layer(1, new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).build()) - .setInputType(InputType.convolutional(h, w, d)).build(); + .inputType(InputType.convolutional(h, w, d)).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -165,10 +164,10 @@ public class TestInvalidInput extends BaseDL4JTest { int w = 16; int d = 3; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() .layer(0, new SubsamplingLayer.Builder().kernelSize(2, 2).build()) .layer(1, new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).build()) - .setInputType(InputType.convolutional(h, w, d)).build(); + .inputType(InputType.convolutional(h, w, d)).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -188,7 +187,7 @@ public class TestInvalidInput extends BaseDL4JTest { @Test public void testInputNinMismatchLSTM() { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() .layer(0, new GravesLSTM.Builder().nIn(5).nOut(5).build()) .layer(1, new RnnOutputLayer.Builder().nIn(5).nOut(5).activation(Activation.SOFTMAX).build()).build(); @@ -209,7 +208,7 @@ public class TestInvalidInput extends BaseDL4JTest { @Test public void testInputNinMismatchBidirectionalLSTM() { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() .layer(0, new GravesBidirectionalLSTM.Builder().nIn(5).nOut(5).build()) .layer(1, new RnnOutputLayer.Builder().nIn(5).nOut(5).activation(Activation.SOFTMAX).build()).build(); @@ -231,7 +230,7 @@ public class TestInvalidInput extends BaseDL4JTest { @Test public void testInputNinMismatchEmbeddingLayer() { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() .layer(0, new EmbeddingLayer.Builder().nIn(10).nOut(10).build()) .layer(1, new OutputLayer.Builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build()).build(); @@ -257,7 +256,7 @@ public class TestInvalidInput extends BaseDL4JTest { for(String layerType : new String[]{"simple", "lstm", "graves"}) { - Layer l; + LayerConfiguration l; switch (layerType){ case "simple": l = new SimpleRnn.Builder().nIn(5).nOut(5).build(); @@ -272,7 +271,7 @@ public class TestInvalidInput extends BaseDL4JTest { throw new RuntimeException(); } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() .layer(l) .layer(new RnnOutputLayer.Builder().nIn(5).nOut(5).activation(Activation.SOFTMAX).build()).build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/AttentionLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/AttentionLayerTest.java index e375aa180..b83cc07c4 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/AttentionLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/AttentionLayerTest.java @@ -23,7 +23,6 @@ package org.deeplearning4j.gradientcheck; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.TestUtils; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.graph.AttentionVertex; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -84,7 +83,7 @@ public class AttentionLayerTest extends BaseDL4JTest { System.out.println("Starting test: " + name); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .activation(Activation.TANH) .updater(new NoOp()) @@ -98,7 +97,7 @@ public class AttentionLayerTest extends BaseDL4JTest { .layer(new GlobalPoolingLayer.Builder().poolingType(PoolingType.MAX).build()) .layer(new OutputLayer.Builder().nOut(nOut).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) - .setInputType(InputType.recurrent(nIn)) + .inputType(InputType.recurrent(nIn)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -145,7 +144,7 @@ public class AttentionLayerTest extends BaseDL4JTest { System.out.println("Starting test: " + name); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .activation(Activation.TANH) .updater(new NoOp()) @@ -159,7 +158,7 @@ public class AttentionLayerTest extends BaseDL4JTest { .layer(new GlobalPoolingLayer.Builder().poolingType(PoolingType.MAX).build()) .layer(new OutputLayer.Builder().nOut(nOut).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) - .setInputType(InputType.recurrent(nIn)) + .inputType(InputType.recurrent(nIn)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -185,7 +184,7 @@ public class AttentionLayerTest extends BaseDL4JTest { for (boolean inputMask : new boolean[]{false, true}) { for (boolean projectInput : new boolean[]{false, true}) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .activation(Activation.TANH) .updater(new NoOp()) @@ -199,7 +198,7 @@ public class AttentionLayerTest extends BaseDL4JTest { .layer(new GlobalPoolingLayer.Builder().poolingType(PoolingType.MAX).build()) .layer(new OutputLayer.Builder().nOut(nOut).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) - .setInputType(InputType.recurrent(nIn)) + .inputType(InputType.recurrent(nIn)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -240,7 +239,7 @@ public class AttentionLayerTest extends BaseDL4JTest { int nOut = 5; int layerSize = 8; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .activation(Activation.IDENTITY) .updater(new NoOp()) @@ -251,7 +250,7 @@ public class AttentionLayerTest extends BaseDL4JTest { .layer(new GlobalPoolingLayer.Builder().poolingType(PoolingType.AVG).build()) .layer(new OutputLayer.Builder().nOut(nOut).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) - .setInputType(InputType.recurrent(nIn)) + .inputType(InputType.recurrent(nIn)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -303,7 +302,7 @@ public class AttentionLayerTest extends BaseDL4JTest { System.out.println("Starting test: " + name); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .activation(Activation.IDENTITY) .updater(new NoOp()) @@ -314,7 +313,7 @@ public class AttentionLayerTest extends BaseDL4JTest { .layer(new GlobalPoolingLayer.Builder().poolingType(PoolingType.AVG).build()) .layer(new OutputLayer.Builder().nOut(nOut).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) - .setInputType(InputType.recurrent(nIn)) + .inputType(InputType.recurrent(nIn)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -361,7 +360,7 @@ public class AttentionLayerTest extends BaseDL4JTest { System.out.println("Starting test: " + name); - ComputationGraphConfiguration graph = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration graph = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .activation(Activation.TANH) .updater(new NoOp()) @@ -425,7 +424,7 @@ public class AttentionLayerTest extends BaseDL4JTest { System.out.println("Starting test: " + name); - ComputationGraphConfiguration graph = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration graph = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .activation(Activation.TANH) .updater(new NoOp()) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/BNGradientCheckTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/BNGradientCheckTest.java index f45861f57..5e6ed72bd 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/BNGradientCheckTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/BNGradientCheckTest.java @@ -25,8 +25,8 @@ import org.deeplearning4j.TestUtils; import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration.NeuralNetConfigurationBuilder; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.distribution.UniformDistribution; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -74,17 +74,17 @@ public class BNGradientCheckTest extends BaseDL4JTest { for (boolean useLogStd : new boolean[]{true, false}) { - MultiLayerConfiguration.Builder builder = - new NeuralNetConfiguration.Builder().updater(new NoOp()) - .dataType(DataType.DOUBLE) - .seed(12345L) - .dist(new NormalDistribution(0, 1)).list() - .layer(0, new DenseLayer.Builder().nIn(4).nOut(3) - .activation(Activation.IDENTITY).build()) - .layer(1, new BatchNormalization.Builder().useLogStd(useLogStd).nOut(3).build()) - .layer(2, new ActivationLayer.Builder().activation(Activation.TANH).build()) - .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) - .activation(Activation.SOFTMAX).nIn(3).nOut(3).build()); + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = + NeuralNetConfiguration.builder().updater(new NoOp()) + .dataType(DataType.DOUBLE) + .seed(12345L) + .dist(new NormalDistribution(0, 1)).list() + .layer(0, new DenseLayer.Builder().nIn(4).nOut(3) + .activation(Activation.IDENTITY).build()) + .layer(1, new BatchNormalization.Builder().useLogStd(useLogStd).nOut(3).build()) + .layer(2, new ActivationLayer.Builder().activation(Activation.TANH).build()) + .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX).nIn(3).nOut(3).build()); MultiLayerNetwork mln = new MultiLayerNetwork(builder.build()); mln.init(); @@ -119,7 +119,7 @@ public class BNGradientCheckTest extends BaseDL4JTest { } for (boolean useLogStd : new boolean[]{true, false}) { - MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .updater(new NoOp()).seed(12345L) .dist(new NormalDistribution(0, 2)).list() @@ -129,7 +129,7 @@ public class BNGradientCheckTest extends BaseDL4JTest { .layer(2, new ActivationLayer.Builder().activation(Activation.TANH).build()) .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(nOut).build()) - .setInputType(InputType.convolutional(hw, hw, depth)); + .inputType(InputType.convolutional(hw, hw, depth)); MultiLayerNetwork mln = new MultiLayerNetwork(builder.build()); mln.init(); @@ -188,7 +188,7 @@ public class BNGradientCheckTest extends BaseDL4JTest { LossFunctions.LossFunction lf = lossFunctions[i]; Activation outputActivation = outputActivations[i]; - MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(12345) + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().seed(12345) .dataType(DataType.DOUBLE) .l2(l2vals[j]) .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT) @@ -203,9 +203,9 @@ public class BNGradientCheckTest extends BaseDL4JTest { .layer(4, new ActivationLayer.Builder().activation(afn).build()) .layer(5, new OutputLayer.Builder(lf).activation(outputActivation).nOut(nOut) .build()) - .setInputType(InputType.convolutional(hw, hw, depth)); + .inputType(InputType.convolutional(hw, hw, depth)); - MultiLayerConfiguration conf = builder.build(); + NeuralNetConfiguration conf = builder.build(); MultiLayerNetwork mln = new MultiLayerNetwork(conf); mln.init(); @@ -294,23 +294,23 @@ public class BNGradientCheckTest extends BaseDL4JTest { LossFunctions.LossFunction lf = lossFunctions[i]; Activation outputActivation = outputActivations[i]; - MultiLayerConfiguration.Builder builder = - new NeuralNetConfiguration.Builder() - .dataType(DataType.DOUBLE) - .l2(l2vals[j]) - .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT) - .updater(new NoOp()) - .dist(new UniformDistribution(-2, 2)).seed(12345L).list() - .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(4) - .activation(afn).build()) - .layer(1, new BatchNormalization.Builder().useLogStd(useLogStd).build()) - .layer(2, new DenseLayer.Builder().nIn(4).nOut(4).build()) - .layer(3, new BatchNormalization.Builder().useLogStd(useLogStd).build()) - .layer(4, new OutputLayer.Builder(lf) - .activation(outputActivation).nOut(nOut) - .build()); + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = + NeuralNetConfiguration.builder() + .dataType(DataType.DOUBLE) + .l2(l2vals[j]) + .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT) + .updater(new NoOp()) + .dist(new UniformDistribution(-2, 2)).seed(12345L).list() + .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(4) + .activation(afn).build()) + .layer(1, new BatchNormalization.Builder().useLogStd(useLogStd).build()) + .layer(2, new DenseLayer.Builder().nIn(4).nOut(4).build()) + .layer(3, new BatchNormalization.Builder().useLogStd(useLogStd).build()) + .layer(4, new OutputLayer.Builder(lf) + .activation(outputActivation).nOut(nOut) + .build()); - MultiLayerConfiguration conf = builder.build(); + NeuralNetConfiguration conf = builder.build(); MultiLayerNetwork mln = new MultiLayerNetwork(conf); mln.init(); @@ -370,7 +370,7 @@ public class BNGradientCheckTest extends BaseDL4JTest { INDArray labels = ds.getLabels(); for (boolean useLogStd : new boolean[]{true, false}) { - MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().updater(new NoOp()) + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().updater(new NoOp()) .dataType(DataType.DOUBLE) .seed(12345L) .dist(new NormalDistribution(0, 1)).list() @@ -414,7 +414,7 @@ public class BNGradientCheckTest extends BaseDL4JTest { } for (boolean useLogStd : new boolean[]{true, false}) { - MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().updater(new NoOp()) + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().updater(new NoOp()) .dataType(DataType.DOUBLE) .seed(12345L) .dist(new NormalDistribution(0, 2)).list() @@ -424,7 +424,7 @@ public class BNGradientCheckTest extends BaseDL4JTest { .layer(2, new ActivationLayer.Builder().activation(Activation.TANH).build()) .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(nOut).build()) - .setInputType(InputType.convolutional(hw, hw, depth)); + .inputType(InputType.convolutional(hw, hw, depth)); MultiLayerNetwork mln = new MultiLayerNetwork(builder.build()); mln.init(); @@ -457,7 +457,7 @@ public class BNGradientCheckTest extends BaseDL4JTest { for (boolean useLogStd : new boolean[]{true, false}) { - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(seed).updater(new NoOp()) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(seed).updater(new NoOp()) .dataType(DataType.DOUBLE) .weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in") .setInputTypes(InputType.convolutional(height, width, channels)) @@ -526,7 +526,7 @@ public class BNGradientCheckTest extends BaseDL4JTest { LossFunctions.LossFunction lf = lossFunctions[i]; Activation outputActivation = outputActivations[i]; - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .dataType(DataType.DOUBLE) .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT) .updater(new NoOp()) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN1DGradientCheckTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN1DGradientCheckTest.java index b9f461775..0f474bb16 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN1DGradientCheckTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN1DGradientCheckTest.java @@ -24,17 +24,14 @@ import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.TestUtils; import org.deeplearning4j.nn.conf.ConvolutionMode; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.RNNFormat; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.*; import org.deeplearning4j.nn.conf.layers.convolutional.Cropping1D; -import org.deeplearning4j.nn.modelimport.keras.KerasModelImport; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.util.Convolution1DUtils; -import org.deeplearning4j.util.ConvolutionUtils; import org.junit.jupiter.api.Test; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.api.buffer.DataType; @@ -45,8 +42,6 @@ import org.nd4j.linalg.indexing.NDArrayIndex; import org.nd4j.linalg.learning.config.NoOp; import org.nd4j.linalg.lossfunctions.LossFunctions; -import java.io.File; - import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -90,7 +85,7 @@ public class CNN1DGradientCheckTest extends BaseDL4JTest { } } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .updater(new NoOp()) .dist(new NormalDistribution(0, 1)).convolutionMode(ConvolutionMode.Same).list() @@ -103,10 +98,10 @@ public class CNN1DGradientCheckTest extends BaseDL4JTest { .build()) .layer(new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(finalNOut).build()) - .setInputType(InputType.recurrent(convNIn, length)).build(); + .inputType(InputType.recurrent(convNIn, length)).build(); String json = conf.toJson(); - MultiLayerConfiguration c2 = MultiLayerConfiguration.fromJson(json); + NeuralNetConfiguration c2 = NeuralNetConfiguration.fromJson(json); assertEquals(conf, c2); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -170,7 +165,7 @@ public class CNN1DGradientCheckTest extends BaseDL4JTest { } } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .updater(new NoOp()) .dist(new NormalDistribution(0, 1)).convolutionMode(ConvolutionMode.Same).list() @@ -183,10 +178,10 @@ public class CNN1DGradientCheckTest extends BaseDL4JTest { .build()) .layer(new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(finalNOut).build()) - .setInputType(InputType.recurrent(convNIn, length,RNNFormat.NCW)).build(); + .inputType(InputType.recurrent(convNIn, length,RNNFormat.NCW)).build(); String json = conf.toJson(); - MultiLayerConfiguration c2 = MultiLayerConfiguration.fromJson(json); + NeuralNetConfiguration c2 = NeuralNetConfiguration.fromJson(json); assertEquals(conf, c2); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -251,7 +246,7 @@ public class CNN1DGradientCheckTest extends BaseDL4JTest { } } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .updater(new NoOp()) .dist(new NormalDistribution(0, 1)).convolutionMode(ConvolutionMode.Same).list() @@ -267,10 +262,10 @@ public class CNN1DGradientCheckTest extends BaseDL4JTest { .stride(stride).padding(padding).pnorm(pnorm).build()) .layer(new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(finalNOut).build()) - .setInputType(InputType.recurrent(convNIn, length,RNNFormat.NCW)).build(); + .inputType(InputType.recurrent(convNIn, length,RNNFormat.NCW)).build(); String json = conf.toJson(); - MultiLayerConfiguration c2 = MultiLayerConfiguration.fromJson(json); + NeuralNetConfiguration c2 = NeuralNetConfiguration.fromJson(json); assertEquals(conf, c2); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -330,7 +325,7 @@ public class CNN1DGradientCheckTest extends BaseDL4JTest { } } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .updater(new NoOp()) .dist(new NormalDistribution(0, 1)).convolutionMode(ConvolutionMode.Same).list() @@ -344,10 +339,10 @@ public class CNN1DGradientCheckTest extends BaseDL4JTest { .stride(stride).padding(padding).pnorm(pnorm).build()) .layer(3, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(finalNOut).build()) - .setInputType(InputType.recurrent(convNIn, length,RNNFormat.NCW)).build(); + .inputType(InputType.recurrent(convNIn, length,RNNFormat.NCW)).build(); String json = conf.toJson(); - MultiLayerConfiguration c2 = MultiLayerConfiguration.fromJson(json); + NeuralNetConfiguration c2 = NeuralNetConfiguration.fromJson(json); assertEquals(conf, c2); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -393,7 +388,7 @@ public class CNN1DGradientCheckTest extends BaseDL4JTest { log.info("Starting test: " + s); Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .updater(new NoOp()) .activation(Activation.TANH) @@ -413,7 +408,7 @@ public class CNN1DGradientCheckTest extends BaseDL4JTest { .layer(new GlobalPoolingLayer(PoolingType.AVG)) .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(finalNOut).build()) - .setInputType(InputType.recurrent(convNIn, length)).build(); + .inputType(InputType.recurrent(convNIn, length)).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -481,7 +476,7 @@ public class CNN1DGradientCheckTest extends BaseDL4JTest { log.info("Starting test: " + s); Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .updater(new NoOp()) .activation(Activation.TANH) @@ -501,7 +496,7 @@ public class CNN1DGradientCheckTest extends BaseDL4JTest { .build()) .layer(new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(finalNOut).build()) - .setInputType(InputType.recurrent(convNIn, length,RNNFormat.NCW)).build(); + .inputType(InputType.recurrent(convNIn, length,RNNFormat.NCW)).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN3DGradientCheckTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN3DGradientCheckTest.java index 1f4a1ceec..ba60ca557 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN3DGradientCheckTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN3DGradientCheckTest.java @@ -24,7 +24,6 @@ import lombok.extern.java.Log; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.TestUtils; import org.deeplearning4j.nn.conf.ConvolutionMode; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -35,7 +34,6 @@ import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.weights.WeightInit; import org.junit.jupiter.api.Test; import org.nd4j.linalg.activations.Activation; -import org.nd4j.linalg.api.buffer.DataBuffer; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; @@ -112,7 +110,7 @@ public class CNN3DGradientCheckTest extends BaseDL4JTest { labels.putScalar(new int[]{i, i % finalNOut}, 1.0); } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .updater(new NoOp()).weightInit(WeightInit.LECUN_NORMAL) .dist(new NormalDistribution(0, 1)) @@ -131,10 +129,10 @@ public class CNN3DGradientCheckTest extends BaseDL4JTest { .inputPreProcessor(2, new Cnn3DToFeedForwardPreProcessor(outDepth, outHeight, outWidth, convNOut2, df == Convolution3D.DataFormat.NCDHW)) - .setInputType(InputType.convolutional3D(df, depth, height, width, convNIn)).build(); + .inputType(InputType.convolutional3D(df, depth, height, width, convNIn)).build(); String json = conf.toJson(); - MultiLayerConfiguration c2 = MultiLayerConfiguration.fromJson(json); + NeuralNetConfiguration c2 = NeuralNetConfiguration.fromJson(json); assertEquals(conf, c2); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -215,7 +213,7 @@ public class CNN3DGradientCheckTest extends BaseDL4JTest { labels.putScalar(new int[]{i, i % finalNOut}, 1.0); } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .updater(new NoOp()).weightInit(WeightInit.LECUN_NORMAL) .dist(new NormalDistribution(0, 1)) @@ -235,10 +233,10 @@ public class CNN3DGradientCheckTest extends BaseDL4JTest { .inputPreProcessor(3, new Cnn3DToFeedForwardPreProcessor(outDepth, outHeight, outWidth, convNOut2, true)) - .setInputType(InputType.convolutional3D(depth, height, width, convNIn)).build(); + .inputType(InputType.convolutional3D(depth, height, width, convNIn)).build(); String json = conf.toJson(); - MultiLayerConfiguration c2 = MultiLayerConfiguration.fromJson(json); + NeuralNetConfiguration c2 = NeuralNetConfiguration.fromJson(json); assertEquals(conf, c2); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -310,7 +308,7 @@ public class CNN3DGradientCheckTest extends BaseDL4JTest { labels.putScalar(new int[]{i, i % finalNOut}, 1.0); } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .updater(new NoOp()) .weightInit(WeightInit.XAVIER) @@ -327,10 +325,10 @@ public class CNN3DGradientCheckTest extends BaseDL4JTest { .activation(Activation.SOFTMAX).nOut(finalNOut).build()) .inputPreProcessor(2, new Cnn3DToFeedForwardPreProcessor(outDepth, outHeight, outWidth,convNOut, df)) - .setInputType(InputType.convolutional3D(df, depth, height, width, convNIn)).build(); + .inputType(InputType.convolutional3D(df, depth, height, width, convNIn)).build(); String json = conf.toJson(); - MultiLayerConfiguration c2 = MultiLayerConfiguration.fromJson(json); + NeuralNetConfiguration c2 = NeuralNetConfiguration.fromJson(json); assertEquals(conf, c2); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -397,7 +395,7 @@ public class CNN3DGradientCheckTest extends BaseDL4JTest { labels.putScalar(new int[]{i, i % finalNOut}, 1.0); } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .updater(new NoOp()).weightInit(WeightInit.LECUN_NORMAL) .dist(new NormalDistribution(0, 1)) @@ -414,10 +412,10 @@ public class CNN3DGradientCheckTest extends BaseDL4JTest { .inputPreProcessor(2, new Cnn3DToFeedForwardPreProcessor(outDepth, outHeight, outWidth, convNOut, true)) - .setInputType(InputType.convolutional3D(df, depth, height, width, convNIn)).build(); + .inputType(InputType.convolutional3D(df, depth, height, width, convNIn)).build(); String json = conf.toJson(); - MultiLayerConfiguration c2 = MultiLayerConfiguration.fromJson(json); + NeuralNetConfiguration c2 = NeuralNetConfiguration.fromJson(json); assertEquals(conf, c2); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -493,7 +491,7 @@ public class CNN3DGradientCheckTest extends BaseDL4JTest { labels.putScalar(new int[]{i, i % finalNOut}, 1.0); } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .updater(new NoOp()).weightInit(WeightInit.LECUN_NORMAL) .dist(new NormalDistribution(0, 1)) @@ -513,10 +511,10 @@ public class CNN3DGradientCheckTest extends BaseDL4JTest { .inputPreProcessor(3, new Cnn3DToFeedForwardPreProcessor(outDepth, outHeight, outWidth, convNOut2, true)) - .setInputType(InputType.convolutional3D(depth, height, width, convNIn)).build(); + .inputType(InputType.convolutional3D(depth, height, width, convNIn)).build(); String json = conf.toJson(); - MultiLayerConfiguration c2 = MultiLayerConfiguration.fromJson(json); + NeuralNetConfiguration c2 = NeuralNetConfiguration.fromJson(json); assertEquals(conf, c2); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -592,7 +590,7 @@ public class CNN3DGradientCheckTest extends BaseDL4JTest { labels.putScalar(new int[]{j, j % finalNOut}, 1.0); } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .updater(new NoOp()) .weightInit(new NormalDistribution(0, 0.1)) @@ -607,10 +605,10 @@ public class CNN3DGradientCheckTest extends BaseDL4JTest { .build()) .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(finalNOut).build()) - .setInputType(InputType.convolutional3D(df, depth, height, width, convNIn)).build(); + .inputType(InputType.convolutional3D(df, depth, height, width, convNIn)).build(); String json = conf.toJson(); - MultiLayerConfiguration c2 = MultiLayerConfiguration.fromJson(json); + NeuralNetConfiguration c2 = NeuralNetConfiguration.fromJson(json); assertEquals(conf, c2); MultiLayerNetwork net = new MultiLayerNetwork(conf); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java index b737fcf79..bee788e55 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java @@ -26,8 +26,8 @@ import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.ConvolutionMode; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration.NeuralNetConfigurationBuilder; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.*; @@ -100,15 +100,15 @@ public class CNNGradientCheckTest extends BaseDL4JTest { LossFunctions.LossFunction lf = lossFunctions[i]; Activation outputActivation = outputActivations[i]; - MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).updater(new NoOp()) .weightInit(WeightInit.XAVIER).seed(12345L).list() .layer(0, new ConvolutionLayer.Builder(1, 1).nOut(6).activation(afn).build()) .layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nOut(3).build()) - .setInputType(InputType.convolutionalFlat(1, 4, 1)); + .inputType(InputType.convolutionalFlat(1, 4, 1)); - MultiLayerConfiguration conf = builder.build(); + NeuralNetConfiguration conf = builder.build(); MultiLayerNetwork mln = new MultiLayerNetwork(conf); mln.init(); @@ -186,7 +186,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest { double l2 = l2vals[i]; double l1 = l1vals[i]; - MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .l2(l2).l1(l1).l2Bias(biasL2[i]).l1Bias(biasL1[i]) .optimizationAlgo( @@ -198,9 +198,9 @@ public class CNNGradientCheckTest extends BaseDL4JTest { .layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nOut(3) .weightInit(WeightInit.XAVIER).updater(new NoOp()).build()) - .setInputType(InputType.convolutionalFlat(1, 4, 1)); + .inputType(InputType.convolutionalFlat(1, 4, 1)); - MultiLayerConfiguration conf = builder.build(); + NeuralNetConfiguration conf = builder.build(); MultiLayerNetwork mln = new MultiLayerNetwork(conf); mln.init(); @@ -269,8 +269,8 @@ public class CNNGradientCheckTest extends BaseDL4JTest { labels.putScalar(new int[]{i, i % nOut}, 1.0); } - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .updater(new NoOp()) .dist(new NormalDistribution(0, 1)) @@ -281,7 +281,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest { .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(2 * 2 * 4) .nOut(nOut).build()) - .setInputType(InputType.convolutionalFlat(height, width, inputDepth)) + .inputType(InputType.convolutionalFlat(height, width, inputDepth)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -334,8 +334,8 @@ public class CNNGradientCheckTest extends BaseDL4JTest { labels.putScalar(new int[]{i, i % nOut}, 1.0); } - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .updater(new NoOp()).weightInit(new NormalDistribution(0, 1)) .list() @@ -349,7 +349,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest { .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX) .nOut(nOut).build()) - .setInputType(InputType.convolutional(height, width, inputDepth, format)) + .inputType(InputType.convolutional(height, width, inputDepth, format)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -403,8 +403,8 @@ public class CNNGradientCheckTest extends BaseDL4JTest { INDArray input = Nd4j.rand(DataType.DOUBLE, inShape); INDArray labels = TestUtils.randomOneHot(minibatchSize, nOut); - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .updater(new NoOp()) .dist(new NormalDistribution(0, 1)) @@ -416,7 +416,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest { .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(8 * 8 * 3) .nOut(4).build()) - .setInputType(InputType.convolutional(height, width, inputDepth, format)) + .inputType(InputType.convolutional(height, width, inputDepth, format)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -472,8 +472,8 @@ public class CNNGradientCheckTest extends BaseDL4JTest { labels.putScalar(new int[]{i, i % nOut}, 1.0); } - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().updater(new NoOp()) + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder().updater(new NoOp()) .dataType(DataType.DOUBLE) .dist(new NormalDistribution(0, 1)) .list().layer(0, @@ -488,7 +488,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest { .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(3 * 3 * 3) .nOut(4).build()) - .setInputType(InputType.convolutional(height, width, inputDepth, format)) + .inputType(InputType.convolutional(height, width, inputDepth, format)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -546,8 +546,8 @@ public class CNNGradientCheckTest extends BaseDL4JTest { labels.putScalar(new int[]{i, i % nOut}, 1.0); } - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().updater(new NoOp()) + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder().updater(new NoOp()) .dataType(DataType.DOUBLE) .dist(new NormalDistribution(0, 1)) .list().layer(0, @@ -562,7 +562,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest { .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(2 * 2 * 2) .nOut(4).build()) - .setInputType(InputType.convolutional(height, width, inputDepth, format)) + .inputType(InputType.convolutional(height, width, inputDepth, format)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -606,7 +606,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest { INDArray input = Nd4j.rand(DataType.DOUBLE, inShape); INDArray labels = TestUtils.randomOneHot(minibatchSize, nOut); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).updater(new NoOp()) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345).updater(new NoOp()) .dataType(DataType.DOUBLE) .activation(afn) .list() @@ -623,10 +623,9 @@ public class CNNGradientCheckTest extends BaseDL4JTest { .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(2 * 2 * 2).nOut(nOut) .build()) - .setInputType(InputType.convolutional(height, width, inputDepth, format)).build(); + .inputType(InputType.convolutional(height, width, inputDepth, format)).build(); - assertEquals(ConvolutionMode.Truncate, - ((ConvolutionLayer) conf.getConf(0).getLayer()).getConvolutionMode()); + assertEquals(ConvolutionMode.Truncate, conf.getConvolutionMode()); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -673,7 +672,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest { labels.putScalar(new int[]{i, i % nOut}, 1.0); } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).updater(new NoOp()) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345).updater(new NoOp()) .dataType(DataType.DOUBLE) .activation(afn) .list() @@ -689,10 +688,9 @@ public class CNNGradientCheckTest extends BaseDL4JTest { .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(2 * 2 * 2).nOut(nOut) .build()) - .setInputType(InputType.convolutional(height, width, inputDepth, format)).build(); + .inputType(InputType.convolutional(height, width, inputDepth, format)).build(); - assertEquals(ConvolutionMode.Truncate, - ((ConvolutionLayer) conf.getConf(0).getLayer()).getConvolutionMode()); + assertEquals(ConvolutionMode.Truncate,conf.getConvolutionMode()); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -744,7 +742,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest { INDArray labels = TestUtils.randomOneHot(minibatchSize, nOut); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .dataType(DataType.DOUBLE) .updater(new NoOp()) .activation(Activation.SIGMOID).convolutionMode(Same).list() @@ -760,7 +758,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest { .stride(1, 1).padding(0, 0).build()) .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(nOut).build()) - .setInputType(InputType.convolutional(height, width, inputDepth, format)).build(); + .inputType(InputType.convolutional(height, width, inputDepth, format)).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -810,14 +808,14 @@ public class CNNGradientCheckTest extends BaseDL4JTest { labels.putScalar(new int[]{i, i % nOut}, 1.0); } - Layer convLayer = new ConvolutionLayer.Builder().name("layer 0").kernelSize(k, k).dataFormat(format) + LayerConfiguration convLayer = new ConvolutionLayer.Builder().name("layer 0").kernelSize(k, k).dataFormat(format) .stride(stride, stride).padding(0, 0).nIn(inputDepth).nOut(2).build(); - Layer poolLayer = new SubsamplingLayer.Builder() + LayerConfiguration poolLayer = new SubsamplingLayer.Builder() .poolingType(SubsamplingLayer.PoolingType.MAX).kernelSize(k, k).dataFormat(format) .stride(stride, stride).padding(0, 0).build(); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .dataType(DataType.DOUBLE) .updater(new NoOp()) .activation(Activation.TANH).convolutionMode(Same).list() @@ -825,7 +823,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest { .layer(1, convFirst ? poolLayer : convLayer) .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(nOut).build()) - .setInputType(InputType.convolutional(height, width, inputDepth, format)) + .inputType(InputType.convolutional(height, width, inputDepth, format)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -883,8 +881,8 @@ public class CNNGradientCheckTest extends BaseDL4JTest { INDArray input = Nd4j.rand(DataType.DOUBLE, inShape); INDArray labels = TestUtils.randomOneHot(minibatchSize, nOut); - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().updater(new NoOp()) + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder().updater(new NoOp()) .dataType(DataType.DOUBLE) .dist(new NormalDistribution(0, 1)).list() .layer(0, new ConvolutionLayer.Builder(kernel, stride, padding).dataFormat(format) @@ -894,7 +892,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest { padding).nIn(3).nOut(3).dataFormat(format).build())//output: (6-2+0)/1+1 = 5 .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(4).build()) - .setInputType(InputType.convolutional(height, width, inputDepth, format)) + .inputType(InputType.convolutional(height, width, inputDepth, format)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -971,7 +969,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest { labels.putScalar(new int[]{j, j % nOut}, 1.0); } - NeuralNetConfiguration.ListBuilder b = new NeuralNetConfiguration.Builder().seed(12345) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .dataType(DataType.DOUBLE) .updater(new NoOp()) .activation(act) @@ -981,11 +979,11 @@ public class CNNGradientCheckTest extends BaseDL4JTest { .stride(s, s).dataFormat(format) .dilation(d, d) .convolutionMode(cm) - .nIn(inputDepth).nOut(nOut).build()); + .nIn(inputDepth).nOut(nOut).build()) - MultiLayerConfiguration conf = b.layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(nOut).build()) - .setInputType(InputType.convolutional(h, w, inputDepth, format)).build(); + .inputType(InputType.convolutional(h, w, inputDepth, format)).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -1043,7 +1041,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest { labels.putScalar(new int[]{i, i % nOut}, 1.0); } - NeuralNetConfiguration.ListBuilder b = new NeuralNetConfiguration.Builder().seed(12345) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .dataType(DataType.DOUBLE) .updater(new NoOp()) .activation(Activation.TANH) @@ -1054,11 +1052,11 @@ public class CNNGradientCheckTest extends BaseDL4JTest { .stride(s, s) .dilation(d, d) .depthMultiplier(3).dataFormat(format) - .nIn(inputDepth).nOut(2).build()); + .nIn(inputDepth).nOut(2).build()) - MultiLayerConfiguration conf = b.layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(nOut).build()) - .setInputType(InputType.convolutional(h, w, inputDepth, format)).build(); + .inputType(InputType.convolutional(h, w, inputDepth, format)).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -1116,7 +1114,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest { labels.putScalar(new int[]{i, i % nOut}, 1.0); } - NeuralNetConfiguration.ListBuilder b = new NeuralNetConfiguration.Builder().seed(12345) + NeuralNetConfiguration.NeuralNetConfigurationBuilder b = NeuralNetConfiguration.builder().seed(12345) .dataType(DataType.DOUBLE) .updater(new NoOp()) .activation(Activation.TANH).convolutionMode(cm).list() @@ -1140,9 +1138,9 @@ public class CNNGradientCheckTest extends BaseDL4JTest { .build()); } - MultiLayerConfiguration conf = b.layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + NeuralNetConfiguration conf = (NeuralNetConfiguration) b.layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(nOut).build()) - .setInputType(InputType.convolutional(h, w, inputDepth, format)).build(); + .inputType(InputType.convolutional(h, w, inputDepth, format)).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -1190,8 +1188,8 @@ public class CNNGradientCheckTest extends BaseDL4JTest { INDArray input = Nd4j.rand(DataType.DOUBLE, inShape); INDArray labels = TestUtils.randomOneHot(minibatchSize, nOut); - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .updater(new NoOp()) .convolutionMode(ConvolutionMode.Same) @@ -1208,7 +1206,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest { .build()) .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(nOut).build()) - .setInputType(InputType.convolutional(height, width, inputDepth, format)) + .inputType(InputType.convolutional(height, width, inputDepth, format)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -1277,7 +1275,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest { labels.putScalar(new int[]{i, i % nOut}, 1.0); } - NeuralNetConfiguration.ListBuilder b = new NeuralNetConfiguration.Builder().seed(12345) + NeuralNetConfiguration.NeuralNetConfigurationBuilder b = NeuralNetConfiguration.builder().seed(12345) .dataType(DataType.DOUBLE) .updater(new NoOp()) .activation(Activation.TANH) @@ -1293,9 +1291,9 @@ public class CNNGradientCheckTest extends BaseDL4JTest { .depthMultiplier(depthMultiplier) .nIn(nIn).build()); // nOut = nIn * depthMultiplier - MultiLayerConfiguration conf = b.layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + NeuralNetConfiguration conf = (NeuralNetConfiguration) b.layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(nOut).build()) - .setInputType(InputType.convolutional(height, width, nIn, format)).build(); + .inputType(InputType.convolutional(height, width, nIn, format)).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CapsnetGradientCheckTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CapsnetGradientCheckTest.java index c0a6cad8e..8d9caef52 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CapsnetGradientCheckTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CapsnetGradientCheckTest.java @@ -24,7 +24,6 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.TestUtils; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.distribution.UniformDistribution; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -44,8 +43,6 @@ import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.learning.config.NoOp; import org.nd4j.linalg.lossfunctions.impl.LossNegativeLogLikelihood; -import java.util.Random; - ////@Ignore public class CapsnetGradientCheckTest extends BaseDL4JTest { @@ -80,12 +77,11 @@ public class CapsnetGradientCheckTest extends BaseDL4JTest { labels.putScalar(new int[]{i, i % capsule}, 1.0); } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .seed(123) .updater(new NoOp()) - .weightInit(new WeightInitDistribution(new UniformDistribution(-6, 6))) - .list() + .dist(new UniformDistribution(-6, 6)) .layer(new PrimaryCapsules.Builder(primaryCapsDim, primarpCapsChannel) .kernelSize(3, 3) .stride(2, 2) @@ -94,7 +90,7 @@ public class CapsnetGradientCheckTest extends BaseDL4JTest { .layer(new CapsuleStrengthLayer.Builder().build()) .layer(new ActivationLayer.Builder(new ActivationSoftmax()).build()) .layer(new LossLayer.Builder(new LossNegativeLogLikelihood()).build()) - .setInputType(InputType.convolutional(height, width, inputDepth)) + .inputType(InputType.convolutional(height, width, inputDepth)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/DropoutGradientCheck.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/DropoutGradientCheck.java index 9aafd297c..5c124dfa0 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/DropoutGradientCheck.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/DropoutGradientCheck.java @@ -25,7 +25,6 @@ import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.TestUtils; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.ConvolutionMode; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.dropout.*; @@ -92,7 +91,7 @@ public class DropoutGradientCheck extends BaseDL4JTest { continue; } - NeuralNetConfiguration.ListBuilder builder = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .dist(new NormalDistribution(0,1)) .convolutionMode(ConvolutionMode.Same) @@ -104,18 +103,18 @@ public class DropoutGradientCheck extends BaseDL4JTest { if(cnn){ builder.layer(new ConvolutionLayer.Builder().kernelSize(3,3).stride(2,2).nOut(2).build()); builder.layer(new ConvolutionLayer.Builder().kernelSize(3,3).stride(2,2).nOut(2).build()); - builder.setInputType(InputType.convolutional(6,6,2)); + builder.inputType(InputType.convolutional(6,6,2)); } else { builder.layer(new DenseLayer.Builder().nOut(3).build()); builder.layer(new DenseLayer.Builder().nOut(3).build()); - builder.setInputType(InputType.feedForward(6)); + builder.inputType(InputType.feedForward(6)); } builder.layer(new OutputLayer.Builder().nOut(3).activation(Activation.SOFTMAX).lossFunction(LossFunction.MCXENT).build()); - MultiLayerConfiguration conf = builder.build(); + NeuralNetConfiguration conf = builder.build(); //Remove spatial dropout from output layer - can't be used for 2d input if(i == 4){ - conf.getConf(2).getLayer().setIDropout(null); + conf.getFlattenedLayerConfigurations().get(2).setIDropout(null); } MultiLayerNetwork mln = new MultiLayerNetwork(conf); @@ -149,7 +148,7 @@ public class DropoutGradientCheck extends BaseDL4JTest { Nd4j.getRandom().setSeed(12345); int mb = 3; - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .dist(new NormalDistribution(0,1)) .convolutionMode(ConvolutionMode.Same) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/GlobalPoolingGradientCheckTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/GlobalPoolingGradientCheckTests.java index 36574096d..18d430044 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/GlobalPoolingGradientCheckTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/GlobalPoolingGradientCheckTests.java @@ -24,7 +24,6 @@ import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.TestUtils; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.ConvolutionMode; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -72,7 +71,7 @@ public class GlobalPoolingGradientCheckTests extends BaseDL4JTest { for (int miniBatchSize : minibatchSizes) { for (PoolingType pt : poolingTypes) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .updater(new NoOp()) .dist(new NormalDistribution(0, 1.0)).seed(12345L).list() @@ -127,7 +126,7 @@ public class GlobalPoolingGradientCheckTests extends BaseDL4JTest { for (int miniBatchSize : minibatchSizes) { for (PoolingType pt : poolingTypes) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .updater(new NoOp()) .dist(new NormalDistribution(0, 1.0)).seed(12345L).list() @@ -138,7 +137,7 @@ public class GlobalPoolingGradientCheckTests extends BaseDL4JTest { .layer(1, new GlobalPoolingLayer.Builder().poolingType(pt).build()) .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(nOut).build()) - .setInputType(InputType.convolutional(inputH, inputW, inputDepth, nchw ? CNN2DFormat.NCHW : CNN2DFormat.NHWC)).build(); + .inputType(InputType.convolutional(inputH, inputW, inputDepth, nchw ? CNN2DFormat.NCHW : CNN2DFormat.NHWC)).build(); MultiLayerNetwork mln = new MultiLayerNetwork(conf); mln.init(); @@ -185,7 +184,7 @@ public class GlobalPoolingGradientCheckTests extends BaseDL4JTest { for (PoolingType pt : poolingTypes) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .updater(new NoOp()) .dist(new NormalDistribution(0, 1.0)).seed(12345L).list() @@ -259,7 +258,7 @@ public class GlobalPoolingGradientCheckTests extends BaseDL4JTest { stride = new int[] {inputH, 1}; } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .updater(new NoOp()) .dist(new NormalDistribution(0, 1.0)).convolutionMode(ConvolutionMode.Same) @@ -270,7 +269,7 @@ public class GlobalPoolingGradientCheckTests extends BaseDL4JTest { .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(nOut).build()) - .setInputType(InputType.convolutional(inputH, inputW, inputDepth)).build(); + .inputType(InputType.convolutional(inputH, inputW, inputDepth)).build(); MultiLayerNetwork mln = new MultiLayerNetwork(conf); mln.init(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTests.java index 553477bd5..90f927d66 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTests.java @@ -26,7 +26,6 @@ import org.deeplearning4j.TestUtils; import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.RNNFormat; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; @@ -74,7 +73,7 @@ public class GradientCheckTests extends BaseDL4JTest { public void testMinibatchApplication() { IrisDataSetIterator iter = new IrisDataSetIterator(30, 150); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().miniBatch(false) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().miniBatch(false) .dataType(DataType.DOUBLE) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(new NoOp()) .list() @@ -164,7 +163,7 @@ public class GradientCheckTests extends BaseDL4JTest { LossFunction lf = lossFunctions[i]; Activation outputActivation = outputActivations[i]; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).updater(new NoOp()) .seed(12345L) @@ -253,8 +252,8 @@ public class GradientCheckTests extends BaseDL4JTest { double l2 = l2vals[k]; double l1 = l1vals[k]; - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().l2(l2).l1(l1) + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder().l2(l2).l1(l1) .dataType(DataType.DOUBLE) .l2Bias(biasL2[k]).l1Bias(biasL1[k]) .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT) @@ -325,7 +324,7 @@ public class GradientCheckTests extends BaseDL4JTest { labels.putScalar(new int[] {i, r.nextInt(3)}, 1.0); } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().l2(0.2).l1(0.1) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().l2(0.2).l1(0.1) .dataType(DataType.DOUBLE) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(12345L) .list().layer(new EmbeddingLayer.Builder().nIn(4).nOut(3).weightInit(WeightInit.XAVIER) @@ -363,7 +362,7 @@ public class GradientCheckTests extends BaseDL4JTest { labels.putScalar(new int[] {i, r.nextInt(3)}, 1.0); } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().l2(0.2).l1(0.1) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().l2(0.2).l1(0.1) .dataType(DataType.DOUBLE) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(12345L) .list().layer(0, @@ -429,8 +428,8 @@ public class GradientCheckTests extends BaseDL4JTest { double l1 = l1vals[k]; Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .updater(new NoOp()) .l2(l2).l1(l1) @@ -491,7 +490,7 @@ public class GradientCheckTests extends BaseDL4JTest { for(Activation a : new Activation[]{Activation.IDENTITY, Activation.TANH}) { - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).updater(new NoOp()) .seed(12345L) @@ -561,7 +560,7 @@ public class GradientCheckTests extends BaseDL4JTest { for (boolean maskArray : new boolean[]{false, true}) { for (int inputRank : new int[]{2, 3}) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .seed(12345) .updater(new NoOp()) @@ -672,8 +671,8 @@ public class GradientCheckTests extends BaseDL4JTest { double l2 = l2vals[k]; double l1 = l1vals[k]; - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().l2(l2).l1(l1) + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder().l2(l2).l1(l1) .dataType(DataType.DOUBLE) .l2Bias(biasL2[k]).l1Bias(biasL1[k]) .weightDecay(wdVals[k]).weightDecayBias(wdBias[k]) @@ -736,7 +735,7 @@ public class GradientCheckTests extends BaseDL4JTest { LossFunction lf = lossFunctions[i]; Activation outputActivation = outputActivations[i]; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).updater(new NoOp()) .seed(12345L) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTestsComputationGraph.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTestsComputationGraph.java index 7718078a6..c121e8b14 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTestsComputationGraph.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTestsComputationGraph.java @@ -36,9 +36,6 @@ import org.deeplearning4j.nn.conf.graph.rnn.ReverseTimeSeriesVertex; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.*; import org.deeplearning4j.nn.conf.layers.recurrent.SimpleRnn; -import org.deeplearning4j.nn.conf.preprocessor.CnnToFeedForwardPreProcessor; -import org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor; -import org.deeplearning4j.nn.conf.preprocessor.RnnToFeedForwardPreProcessor; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.weights.WeightInit; @@ -52,7 +49,6 @@ import org.nd4j.linalg.indexing.NDArrayIndex; import org.nd4j.linalg.learning.config.NoOp; import org.nd4j.linalg.lossfunctions.LossFunctions; -import java.util.Arrays; import java.util.Map; import java.util.Random; @@ -74,7 +70,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { @Test public void testBasicIris() { Nd4j.getRandom().setSeed(12345); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .dataType(DataType.DOUBLE) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .dist(new NormalDistribution(0, 1)).updater(new NoOp()) @@ -120,7 +116,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { @Test public void testBasicIrisWithMerging() { Nd4j.getRandom().setSeed(12345); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .dataType(DataType.DOUBLE) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .dist(new NormalDistribution(0, 1)).updater(new NoOp()) @@ -177,7 +173,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { for (ElementWiseVertex.Op op : ops) { Nd4j.getRandom().setSeed(12345); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .dataType(DataType.DOUBLE) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .dist(new NormalDistribution(0, 1)) @@ -235,7 +231,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { for (ElementWiseVertex.Op op : ops) { Nd4j.getRandom().setSeed(12345); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .dataType(DataType.DOUBLE) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .dist(new NormalDistribution(0, 1)) @@ -295,7 +291,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { for(boolean firstSmaller : new boolean[]{false, true}) { for (ElementWiseVertex.Op op : ops) { - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .updater(new NoOp()) .dataType(DataType.DOUBLE) .activation(Activation.TANH) @@ -343,7 +339,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { String msg = "testCnnDepthMerge - " + format; Nd4j.getRandom().setSeed(12345); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .dataType(DataType.DOUBLE) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .dist(new NormalDistribution(0, 0.1)) @@ -398,7 +394,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { int outSize = 3; Nd4j.getRandom().setSeed(12345); ComputationGraphConfiguration conf = - new NeuralNetConfiguration.Builder().seed(12345) + NeuralNetConfiguration.builder().seed(12345) .dataType(DataType.DOUBLE) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .dist(new UniformDistribution(0.2, 0.6)) @@ -457,7 +453,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { int batchSize = 2; int timeSeriesLength = 4; int inLength = 3; - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(1234) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(1234) .dataType(DataType.DOUBLE) .weightInit(new NormalDistribution(0, 1)) .updater(new NoOp()).graphBuilder().addInputs("input").setOutputs("out") @@ -493,7 +489,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { public void testLSTMWithLastTimeStepVertex() { Nd4j.getRandom().setSeed(12345); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .dataType(DataType.DOUBLE) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .dist(new NormalDistribution(0, 1)) @@ -545,7 +541,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { int timeSeriesLength = 4; Nd4j.getRandom().setSeed(12345); ComputationGraphConfiguration conf = - new NeuralNetConfiguration.Builder().seed(12345) + NeuralNetConfiguration.builder().seed(12345) .dataType(DataType.DOUBLE) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .dist(new NormalDistribution(0, 1)) @@ -595,7 +591,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { int timeSeriesLength = 4; Nd4j.getRandom().setSeed(12345); ComputationGraphConfiguration conf = - new NeuralNetConfiguration.Builder().seed(12345) + NeuralNetConfiguration.builder().seed(12345) .dataType(DataType.DOUBLE) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .dist(new NormalDistribution(0, 1)) @@ -654,7 +650,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { public void testMultipleInputsLayer() { Nd4j.getRandom().setSeed(12345); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .dataType(DataType.DOUBLE) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .dist(new NormalDistribution(0, 1)) @@ -697,7 +693,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { @Test public void testMultipleOutputsLayer() { Nd4j.getRandom().setSeed(12345); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .dataType(DataType.DOUBLE) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .dist(new NormalDistribution(0, 1)) @@ -737,7 +733,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { @Test public void testMultipleOutputsMergeVertex() { Nd4j.getRandom().setSeed(12345); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .dataType(DataType.DOUBLE) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .dist(new NormalDistribution(0, 1)) @@ -786,7 +782,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { int inW = 7; Nd4j.getRandom().setSeed(12345); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .dataType(DataType.DOUBLE) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .dist(new NormalDistribution(0, 1)) @@ -836,7 +832,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { public void testBasicIrisTripletStackingL2Loss() { Nd4j.getRandom().setSeed(12345); ComputationGraphConfiguration conf = - new NeuralNetConfiguration.Builder().seed(12345) + NeuralNetConfiguration.builder().seed(12345) .dataType(DataType.DOUBLE) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .dist(new NormalDistribution(0, 1)) @@ -910,7 +906,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { for (boolean train : trainFirst) { for (double lambda : new double[] {0.0, 0.5, 2.0}) { - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .dataType(DataType.DOUBLE) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .dist(new GaussianDistribution(0, 1)) @@ -975,7 +971,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { for (boolean train : trainFirst) { for (double lambda : new double[] {0.0, 0.5, 2.0}) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .updater(new NoOp()) .dist(new NormalDistribution(0, 1.0)).seed(12345L).list() @@ -986,7 +982,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { .alpha(1.0).lambda(lambda).gradientCheck(true) .activation(Activation.SOFTMAX).build()) - .setInputType(InputType.convolutional(inputH, inputW, inputDepth)).build(); + .inputType(InputType.convolutional(inputH, inputW, inputDepth)).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -1029,7 +1025,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { @Test public void testBasicL2() { Nd4j.getRandom().setSeed(12345); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .dataType(DataType.DOUBLE) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .dist(new NormalDistribution(0, 1)) @@ -1081,7 +1077,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { int layerSizes = 2; Nd4j.getRandom().setSeed(12345); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .dataType(DataType.DOUBLE) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .dist(new NormalDistribution(0, 1)) @@ -1136,7 +1132,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { public void testBasicStackUnstackDebug() { Nd4j.getRandom().setSeed(12345); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .dataType(DataType.DOUBLE) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .dist(new NormalDistribution(0, 1)) @@ -1196,7 +1192,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { int layerSizes = 2; Nd4j.getRandom().setSeed(12345); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .dataType(DataType.DOUBLE) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .dist(new NormalDistribution(0, 1)) @@ -1259,7 +1255,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { public void testBasicTwoOutputs() { Nd4j.getRandom().setSeed(12345); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .dataType(DataType.DOUBLE) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .dist(new NormalDistribution(0, 1)) @@ -1320,7 +1316,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { int[][] definitions = {null,new int[]{1}}; for(int[] definition : definitions) { log.info("Testing definition {}",definition); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .dataType(DataType.DOUBLE) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .activation(Activation.TANH).updater(new NoOp()).graphBuilder() @@ -1368,7 +1364,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { int w = 4; int dIn = 2; - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .dataType(DataType.DOUBLE) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .dist(new NormalDistribution(0, 1)) @@ -1420,7 +1416,7 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { labels.putScalar(new int[] {i, r.nextInt(3)}, 1.0); } - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().l2(0.2).l1(0.1) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().l2(0.2).l1(0.1) .dataType(DataType.DOUBLE) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(12345L) .updater(new NoOp()).graphBuilder().addInputs("in") diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTestsMasking.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTestsMasking.java index 4efd20ee7..689720529 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTestsMasking.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTestsMasking.java @@ -24,7 +24,6 @@ import lombok.val; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.TestUtils; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.RNNFormat; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; @@ -117,7 +116,7 @@ public class GradientCheckTestsMasking extends BaseDL4JTest { maskArr.putScalar(new int[] {0, j}, mask[i][j] ? 1.0 : 0.0); } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345L) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345L) .dataType(DataType.DOUBLE) .updater(new NoOp()) .list() @@ -158,7 +157,7 @@ public class GradientCheckTestsMasking extends BaseDL4JTest { int testNum = 0; for (INDArray mask : masks) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .updater(new NoOp()) .dataType(DataType.DOUBLE) .dist(new NormalDistribution(0, 1.0)).seed(12345L).list() @@ -238,7 +237,7 @@ public class GradientCheckTestsMasking extends BaseDL4JTest { Activation a = act[i]; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new NoOp()) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new NoOp()) .dataType(DataType.DOUBLE) .dist(new NormalDistribution(0, 1)).seed(12345) .list() @@ -332,7 +331,7 @@ public class GradientCheckTestsMasking extends BaseDL4JTest { Activation a = act[i]; Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new NoOp()) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new NoOp()) .dataType(DataType.DOUBLE) .dist(new NormalDistribution(0, 1)).seed(12345) .list() @@ -341,7 +340,7 @@ public class GradientCheckTestsMasking extends BaseDL4JTest { .layer(1, new RnnOutputLayer.Builder().nIn(layerSize).nOut(nOut).lossFunction(lf) .activation(a).build()) .validateOutputLayerConfig(false) - .setInputType(InputType.recurrent(nIn,tsLength, RNNFormat.NCW)) + .inputType(InputType.recurrent(nIn,tsLength, RNNFormat.NCW)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -365,7 +364,7 @@ public class GradientCheckTestsMasking extends BaseDL4JTest { //Check the equivalent compgraph: Nd4j.getRandom().setSeed(12345); - ComputationGraphConfiguration cg = new NeuralNetConfiguration.Builder().updater(new NoOp()) + ComputationGraphConfiguration cg = NeuralNetConfiguration.builder().updater(new NoOp()) .dataType(DataType.DOUBLE) .dist(new NormalDistribution(0, 2)).seed(12345) .graphBuilder().addInputs("in") @@ -397,7 +396,7 @@ public class GradientCheckTestsMasking extends BaseDL4JTest { int mb = 4; int tsLength = 5; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .weightInit(new NormalDistribution(0,2)) .updater(new NoOp()) @@ -405,7 +404,7 @@ public class GradientCheckTestsMasking extends BaseDL4JTest { .layer(new LSTM.Builder().nIn(3).nOut(3).build()) .layer(new GlobalPoolingLayer.Builder().poolingType(PoolingType.AVG).build()) .layer(new OutputLayer.Builder().nIn(3).nOut(3).activation(Activation.SOFTMAX).build()) - .setInputType(InputType.recurrent(3)) + .inputType(InputType.recurrent(3)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -452,7 +451,7 @@ public class GradientCheckTestsMasking extends BaseDL4JTest { int mb = 10; int tsLength = 5; - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .weightInit(new NormalDistribution(0,2)) .updater(new NoOp()) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/LRNGradientCheckTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/LRNGradientCheckTests.java index 87ea20cf5..18769905c 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/LRNGradientCheckTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/LRNGradientCheckTests.java @@ -22,8 +22,8 @@ package org.deeplearning4j.gradientcheck; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.TestUtils; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration.NeuralNetConfigurationBuilder; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; @@ -68,7 +68,7 @@ public class LRNGradientCheckTests extends BaseDL4JTest { labels.putScalar(i, r.nextInt(nOut), 1.0); } - MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().updater(new NoOp()) + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().updater(new NoOp()) .dataType(DataType.DOUBLE) .seed(12345L) .dist(new NormalDistribution(0, 2)).list() @@ -77,7 +77,7 @@ public class LRNGradientCheckTests extends BaseDL4JTest { .layer(1, new LocalResponseNormalization.Builder().build()) .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(nOut).build()) - .setInputType(InputType.convolutional(hw, hw, depth)); + .inputType(InputType.convolutional(hw, hw, depth)); MultiLayerNetwork mln = new MultiLayerNetwork(builder.build()); mln.init(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/LSTMGradientCheckTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/LSTMGradientCheckTests.java index a2c7d7039..421b6a63d 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/LSTMGradientCheckTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/LSTMGradientCheckTests.java @@ -22,8 +22,8 @@ package org.deeplearning4j.gradientcheck; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.TestUtils; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration.NeuralNetConfigurationBuilder; import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.distribution.UniformDistribution; @@ -70,8 +70,8 @@ public class LSTMGradientCheckTests extends BaseDL4JTest { for (boolean graves : gravesLSTM) { - Layer l0; - Layer l1; + LayerConfiguration l0; + LayerConfiguration l1; if (graves) { l0 = new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).activation(Activation.SIGMOID) .dist(new NormalDistribution(0, 1.0)) @@ -88,8 +88,8 @@ public class LSTMGradientCheckTests extends BaseDL4JTest { .updater(new NoOp()).build(); } - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().seed(12345L) + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder().seed(12345L) .dataType(DataType.DOUBLE) .list() .layer(0, l0).layer(1, @@ -179,11 +179,11 @@ public class LSTMGradientCheckTests extends BaseDL4JTest { double l1 = l1vals[i]; Activation afn = activFns[i]; - NeuralNetConfiguration.Builder conf = - new NeuralNetConfiguration.Builder() - .dataType(DataType.DOUBLE) - .seed(12345L) - .dist(new NormalDistribution(0, 1)).updater(new NoOp()); + NeuralNetConfiguration.NeuralNetConfigurationBuilder conf = + NeuralNetConfiguration.builder() + .dataType(DataType.DOUBLE) + .seed(12345L) + .dist(new NormalDistribution(0, 1)).updater(new NoOp()); if (l1 > 0.0) conf.l1(l1); @@ -194,17 +194,17 @@ public class LSTMGradientCheckTests extends BaseDL4JTest { if (biasL1[i] > 0) conf.l1Bias(biasL1[i]); - Layer layer; + LayerConfiguration layer; if (graves) { layer = new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).activation(afn).build(); } else { layer = new LSTM.Builder().nIn(nIn).nOut(layerSize).activation(afn).build(); } - NeuralNetConfiguration.ListBuilder conf2 = conf.list().layer(0, layer) + NeuralNetConfiguration.NeuralNetConfigurationBuilder conf2 = (NeuralNetConfigurationBuilder) conf + .layer(0, layer) .layer(1, new RnnOutputLayer.Builder(lf).activation(outputActivation) - .nIn(layerSize).nOut(nOut).build()) - ; + .nIn(layerSize).nOut(nOut).build()); MultiLayerNetwork mln = new MultiLayerNetwork(conf2.build()); mln.init(); @@ -249,14 +249,14 @@ public class LSTMGradientCheckTests extends BaseDL4JTest { INDArray labels = TestUtils.randomOneHotTimeSeries(miniBatchSize[i], nOut, timeSeriesLength[i]); - Layer layer; + LayerConfiguration layer; if (graves) { layer = new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH).build(); } else { layer = new LSTM.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH).build(); } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345L) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345L) .dataType(DataType.DOUBLE) .dist(new NormalDistribution(0, 1)) .updater(new NoOp()).list().layer(0, layer) @@ -309,8 +309,8 @@ public class LSTMGradientCheckTests extends BaseDL4JTest { double l2 = l2vals[k]; double l1 = l1vals[k]; - NeuralNetConfiguration.Builder conf = - new NeuralNetConfiguration.Builder(); + NeuralNetConfiguration.NeuralNetConfigurationBuilder conf = + NeuralNetConfiguration.builder(); if (l1 > 0.0) conf.l1(l1); if (l2 > 0.0) @@ -320,10 +320,10 @@ public class LSTMGradientCheckTests extends BaseDL4JTest { if (biasL1[k] > 0) conf.l1Bias(biasL1[k]); - MultiLayerConfiguration mlc = conf.seed(12345L) + NeuralNetConfiguration mlc = (NeuralNetConfiguration) conf.seed(12345L) .dataType(DataType.DOUBLE) .updater(new NoOp()) - .list().layer(0, + .layer(0, new GravesBidirectionalLSTM.Builder().nIn(nIn).nOut(layerSize) .weightInit(new NormalDistribution(0, 1)) .activation(afn) @@ -380,7 +380,7 @@ public class LSTMGradientCheckTests extends BaseDL4JTest { } } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345L) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345L) .dataType(DataType.DOUBLE) .list() .layer(0, new GravesBidirectionalLSTM.Builder().nIn(nIn).nOut(layerSize) @@ -429,7 +429,7 @@ public class LSTMGradientCheckTests extends BaseDL4JTest { } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new NoOp()).seed(12345) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new NoOp()).seed(12345) .dataType(DataType.DOUBLE) .dist(new UniformDistribution(-2, 2)).list() .layer(0, new ConvolutionLayer.Builder(3, 3).nIn(2).nOut(3).stride(1, 1) @@ -440,7 +440,7 @@ public class LSTMGradientCheckTests extends BaseDL4JTest { .layer(3, new GravesLSTM.Builder().nIn(4).nOut(3).activation(Activation.TANH).build()) .layer(4, new RnnOutputLayer.Builder().lossFunction(LossFunction.MCXENT).nIn(3).nOut(nClasses) .activation(Activation.SOFTMAX).build()) - .setInputType(InputType.convolutional(6, 6, 2)).build(); + .inputType(InputType.convolutional(6, 6, 2)).build(); //Here: ConvolutionLayerSetup in config builder doesn't know that we are expecting time series input, not standard FF input -> override it here conf.getInputPreProcessors().put(0, new RnnToCnnPreProcessor(6, 6, 2)); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/LossFunctionGradientCheck.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/LossFunctionGradientCheck.java index 74b142845..0cf7ebd1b 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/LossFunctionGradientCheck.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/LossFunctionGradientCheck.java @@ -26,7 +26,6 @@ import org.deeplearning4j.TestUtils; import org.deeplearning4j.gradientcheck.sdlosscustom.SDLossMAE; import org.deeplearning4j.gradientcheck.sdlosscustom.SDLossMSE; import org.deeplearning4j.nn.api.OptimizationAlgorithm; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.distribution.UniformDistribution; @@ -183,7 +182,7 @@ public class LossFunctionGradientCheck extends BaseDL4JTest { + minibatchSizes[j]; Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(12345) .updater(new NoOp()) @@ -347,7 +346,7 @@ public class LossFunctionGradientCheck extends BaseDL4JTest { } Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(12345) .updater(new NoOp()) @@ -362,7 +361,7 @@ public class LossFunctionGradientCheck extends BaseDL4JTest { MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); - assertSame(((LossLayer) net.getLayer(1).conf().getLayer()).getLossFn().getClass(), lossFunctions[i] + assertSame(((LossLayer) net.getLayer(1).getLayerConfiguration()).getLossFn().getClass(), lossFunctions[i] .getClass()); INDArray[] inOut = getFeaturesAndLabels(lossFunctions[i], minibatchSizes[j], 4, nOut[i], 12345); @@ -649,7 +648,7 @@ public class LossFunctionGradientCheck extends BaseDL4JTest { + minibatchSizes[j] + "; weights = " + w; Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(12345) .updater(new NoOp()) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/NoBiasGradientCheckTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/NoBiasGradientCheckTests.java index 477199be0..f47a4ee0e 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/NoBiasGradientCheckTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/NoBiasGradientCheckTests.java @@ -22,7 +22,6 @@ package org.deeplearning4j.gradientcheck; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.TestUtils; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -69,7 +68,7 @@ public class NoBiasGradientCheckTests extends BaseDL4JTest { for (boolean denseHasBias : new boolean[]{true, false}) { for (boolean outHasBias : new boolean[]{true, false}) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .updater(new NoOp()) .seed(12345L) @@ -140,7 +139,7 @@ public class NoBiasGradientCheckTests extends BaseDL4JTest { for (boolean rnnOutHasBias : new boolean[]{true, false}) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .updater(new NoOp()) .seed(12345L) @@ -201,7 +200,7 @@ public class NoBiasGradientCheckTests extends BaseDL4JTest { for (boolean embeddingHasBias : new boolean[]{true, false}) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .updater(new NoOp()) .seed(12345L) @@ -267,8 +266,8 @@ public class NoBiasGradientCheckTests extends BaseDL4JTest { for(boolean cnnHasBias : new boolean[]{true, false}) { - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().updater(new NoOp()) + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder().updater(new NoOp()) .dataType(DataType.DOUBLE) .dist(new NormalDistribution(0, 1)) .list() @@ -285,7 +284,7 @@ public class NoBiasGradientCheckTests extends BaseDL4JTest { .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX) .nOut(4).build()) - .setInputType(InputType.convolutionalFlat(height, width, inputDepth)) + .inputType(InputType.convolutionalFlat(height, width, inputDepth)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/OutputLayerGradientChecks.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/OutputLayerGradientChecks.java index 0928b52de..7556178b9 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/OutputLayerGradientChecks.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/OutputLayerGradientChecks.java @@ -23,7 +23,6 @@ package org.deeplearning4j.gradientcheck; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.TestUtils; import org.deeplearning4j.nn.conf.ConvolutionMode; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.layers.*; @@ -117,8 +116,8 @@ public class OutputLayerGradientChecks extends BaseDL4JTest { Activation oa = maskType == 2 ? Activation.SIGMOID : Activation.SOFTMAX; - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().seed(12345L) + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder().seed(12345L) .dataType(DataType.DOUBLE) .updater(new NoOp()) .list() @@ -223,8 +222,8 @@ public class OutputLayerGradientChecks extends BaseDL4JTest { Activation oa = maskType == 3 ? Activation.SIGMOID : Activation.SOFTMAX; - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().seed(12345L) + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder().seed(12345L) .dataType(DataType.DOUBLE) .updater(new NoOp()) .convolutionMode(ConvolutionMode.Same) @@ -370,8 +369,8 @@ public class OutputLayerGradientChecks extends BaseDL4JTest { Activation oa = maskType == 1 ? Activation.SOFTMAX : Activation.SIGMOID; - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().seed(12345L) + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder().seed(12345L) .dataType(DataType.DOUBLE) .updater(new NoOp()) .convolutionMode(ConvolutionMode.Same) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/RnnGradientChecks.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/RnnGradientChecks.java index 87a42e4e0..44e904d7e 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/RnnGradientChecks.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/RnnGradientChecks.java @@ -22,7 +22,6 @@ package org.deeplearning4j.gradientcheck; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.TestUtils; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.DenseLayer; @@ -108,7 +107,7 @@ public class RnnGradientChecks extends BaseDL4JTest { System.out.println("Starting test: " + name); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .updater(new NoOp()) .weightInit(WeightInit.XAVIER) @@ -187,7 +186,7 @@ public class RnnGradientChecks extends BaseDL4JTest { System.out.println("Starting test: " + name); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .updater(new NoOp()) .weightInit(WeightInit.XAVIER) @@ -263,7 +262,7 @@ public class RnnGradientChecks extends BaseDL4JTest { System.out.println("Starting test: " + name); } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .activation(Activation.TANH) .updater(new NoOp()) @@ -275,7 +274,7 @@ public class RnnGradientChecks extends BaseDL4JTest { new LSTM.Builder().nOut(layerSize).build())) .layer(new OutputLayer.Builder().nOut(nOut).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) - .setInputType(InputType.recurrent(nIn)) + .inputType(InputType.recurrent(nIn)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -329,7 +328,7 @@ public class RnnGradientChecks extends BaseDL4JTest { System.out.println("Starting test: " + name); } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .activation(Activation.TANH) .updater(new NoOp()) @@ -339,7 +338,7 @@ public class RnnGradientChecks extends BaseDL4JTest { .layer(new TimeDistributed(new DenseLayer.Builder().nOut(layerSize).activation(Activation.SOFTMAX).build())) .layer(new RnnOutputLayer.Builder().nOut(nOut).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) - .setInputType(InputType.recurrent(nIn)) + .inputType(InputType.recurrent(nIn)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/UtilLayerGradientChecks.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/UtilLayerGradientChecks.java index 670987c78..212bd29da 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/UtilLayerGradientChecks.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/UtilLayerGradientChecks.java @@ -23,7 +23,6 @@ package org.deeplearning4j.gradientcheck; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.TestUtils; import org.deeplearning4j.nn.conf.ConvolutionMode; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; @@ -127,9 +126,9 @@ public class UtilLayerGradientChecks extends BaseDL4JTest { String name = "mb=" + minibatch + ", maskType=" + maskType + ", inputRank=" + inputRank; System.out.println("*** Starting test: " + name); - Layer l1; - Layer l2; - Layer l3; + LayerConfiguration l1; + LayerConfiguration l2; + LayerConfiguration l3; InputType it; switch (inputRank){ case 2: @@ -163,7 +162,7 @@ public class UtilLayerGradientChecks extends BaseDL4JTest { } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .updater(new NoOp()) .activation(Activation.TANH) .dataType(DataType.DOUBLE) @@ -173,7 +172,7 @@ public class UtilLayerGradientChecks extends BaseDL4JTest { .layer(new MaskLayer()) .layer(l2) .layer(l3) - .setInputType(it) + .inputType(it) .build(); @@ -197,10 +196,10 @@ public class UtilLayerGradientChecks extends BaseDL4JTest { for( int minibatch : new int[]{1,5}) { - MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .seed(12345) - .updater(Updater.NONE) + .updater(Updater.NONE.getIUpdaterWithDefaultConfig()) .list() .layer(new DenseLayer.Builder().nIn(10).nOut(10) .activation(Activation.TANH).weightInit(WeightInit.XAVIER).build()) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/VaeGradientCheckTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/VaeGradientCheckTests.java index 40041885e..233836066 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/VaeGradientCheckTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/VaeGradientCheckTests.java @@ -22,7 +22,6 @@ package org.deeplearning4j.gradientcheck; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.TestUtils; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.layers.OutputLayer; @@ -94,8 +93,8 @@ public class VaeGradientCheckTests extends BaseDL4JTest { } Activation afn = activFns[i]; - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().l2(l2).l1(l1) + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder().l2(l2).l1(l1) .dataType(DataType.DOUBLE) .updater(new NoOp()) .l2Bias(biasL2[i]).l1Bias(biasL1[i]) @@ -170,7 +169,7 @@ public class VaeGradientCheckTests extends BaseDL4JTest { Activation pzxAfn = pzxAfns[i]; Activation pxzAfn = pxzAfns[i]; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().l2(l2) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().l2(l2) .dataType(DataType.DOUBLE) .l1(l1).l2Bias(biasL2[i]).l1Bias(biasL1[i]).updater(new NoOp()) .seed(12345L).weightInit(WeightInit.XAVIER).list() @@ -259,7 +258,7 @@ public class VaeGradientCheckTests extends BaseDL4JTest { throw new RuntimeException(); } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().l2(0.2).l1(0.3) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().l2(0.2).l1(0.3) .dataType(DataType.DOUBLE) .updater(new NoOp()) .seed(12345L).dist(new NormalDistribution(0, 1)) @@ -303,7 +302,7 @@ public class VaeGradientCheckTests extends BaseDL4JTest { for (int numSamples : new int[]{1, 2}) { INDArray features = Nd4j.rand(DataType.DOUBLE, minibatch, 4); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().l2(0.2).l1(0.3) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().l2(0.2).l1(0.3) .dataType(DataType.DOUBLE) .updater(new NoOp()) .seed(12345L).weightInit(WeightInit.XAVIER).list() diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/YoloGradientCheckTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/YoloGradientCheckTests.java index 9ae3e598a..1eb72b1bd 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/YoloGradientCheckTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/YoloGradientCheckTests.java @@ -109,7 +109,7 @@ public class YoloGradientCheckTests extends BaseDL4JTest { labels = yoloLabels(mb, c, h, w).permute(0,2,3,1); } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .dataType(DataType.DOUBLE) .updater(new NoOp()) .activation(a) @@ -122,7 +122,7 @@ public class YoloGradientCheckTests extends BaseDL4JTest { .layer(new Yolo2OutputLayer.Builder() .boundingBoxPriors(bbPrior) .build()) - .setInputType(InputType.convolutional(h, w, depthIn, format)) + .inputType(InputType.convolutional(h, w, depthIn, format)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -227,7 +227,7 @@ public class YoloGradientCheckTests extends BaseDL4JTest { DataSetIterator iter = new RecordReaderDataSetIterator(rr,2,1,1,true); iter.setPreProcessor(new ImagePreProcessingScaler()); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .convolutionMode(ConvolutionMode.Same) .updater(new NoOp()) @@ -240,7 +240,7 @@ public class YoloGradientCheckTests extends BaseDL4JTest { .layer(new Yolo2OutputLayer.Builder() .boundingBoxPriors(bbPriors) .build()) - .setInputType(InputType.convolutional(h,w,c)) + .inputType(InputType.convolutional(h,w,c)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/ComputationGraphConfigurationTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/ComputationGraphConfigurationTest.java index 7862cb95f..6e0cbd770 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/ComputationGraphConfigurationTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/ComputationGraphConfigurationTest.java @@ -57,7 +57,7 @@ public class ComputationGraphConfigurationTest extends BaseDL4JTest { @Test public void testJSONBasic() { - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .dist(new NormalDistribution(0, 1)).updater(new NoOp()) .graphBuilder().addInputs("input") @@ -79,7 +79,7 @@ public class ComputationGraphConfigurationTest extends BaseDL4JTest { @Test public void testJSONBasic2() { ComputationGraphConfiguration conf = - new NeuralNetConfiguration.Builder() + NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .graphBuilder().addInputs("input") .addLayer("cnn1", @@ -115,7 +115,7 @@ public class ComputationGraphConfigurationTest extends BaseDL4JTest { public void testJSONWithGraphNodes() { ComputationGraphConfiguration conf = - new NeuralNetConfiguration.Builder() + NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .graphBuilder().addInputs("input1", "input2") .addLayer("cnn1", @@ -149,7 +149,7 @@ public class ComputationGraphConfigurationTest extends BaseDL4JTest { //Test no inputs for a layer: try { - new NeuralNetConfiguration.Builder().graphBuilder().addInputs("input1") + NeuralNetConfiguration.builder().graphBuilder().addInputs("input1") .addLayer("dense1", new DenseLayer.Builder().nIn(2).nOut(2).build(), "input1") .addLayer("out", new OutputLayer.Builder().nIn(2).nOut(2).build()).setOutputs("out") .build(); @@ -161,7 +161,7 @@ public class ComputationGraphConfigurationTest extends BaseDL4JTest { // Use appendLayer on first layer try { - new NeuralNetConfiguration.Builder().graphBuilder() + NeuralNetConfiguration.builder().graphBuilder() .appendLayer("dense1", new DenseLayer.Builder().nIn(2).nOut(2).build()) .addLayer("out", new OutputLayer.Builder().nIn(2).nOut(2).build()).setOutputs("out") .build(); @@ -173,7 +173,7 @@ public class ComputationGraphConfigurationTest extends BaseDL4JTest { //Test no network inputs try { - new NeuralNetConfiguration.Builder().graphBuilder() + NeuralNetConfiguration.builder().graphBuilder() .addLayer("dense1", new DenseLayer.Builder().nIn(2).nOut(2).build(), "input1") .addLayer("out", new OutputLayer.Builder().nIn(2).nOut(2).build(), "dense1") .setOutputs("out").build(); @@ -185,7 +185,7 @@ public class ComputationGraphConfigurationTest extends BaseDL4JTest { //Test no network outputs try { - new NeuralNetConfiguration.Builder().graphBuilder().addInputs("input1") + NeuralNetConfiguration.builder().graphBuilder().addInputs("input1") .addLayer("dense1", new DenseLayer.Builder().nIn(2).nOut(2).build(), "input1") .addLayer("out", new OutputLayer.Builder().nIn(2).nOut(2).build(), "dense1").build(); fail("No exception thrown for invalid configuration"); @@ -196,7 +196,7 @@ public class ComputationGraphConfigurationTest extends BaseDL4JTest { //Test: invalid input try { - new NeuralNetConfiguration.Builder().graphBuilder().addInputs("input1") + NeuralNetConfiguration.builder().graphBuilder().addInputs("input1") .addLayer("dense1", new DenseLayer.Builder().nIn(2).nOut(2).build(), "input1") .addLayer("out", new OutputLayer.Builder().nIn(2).nOut(2).build(), "thisDoesntExist") .setOutputs("out").build(); @@ -208,7 +208,7 @@ public class ComputationGraphConfigurationTest extends BaseDL4JTest { //Test: graph with cycles try { - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().graphBuilder().addInputs("input1") + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().graphBuilder().addInputs("input1") .addLayer("dense1", new DenseLayer.Builder().nIn(2).nOut(2).build(), "input1", "dense3") .addLayer("dense2", new DenseLayer.Builder().nIn(2).nOut(2).build(), "dense1") .addLayer("dense3", new DenseLayer.Builder().nIn(2).nOut(2).build(), "dense2") @@ -226,7 +226,7 @@ public class ComputationGraphConfigurationTest extends BaseDL4JTest { //Test: input != inputType count mismatch try { - new NeuralNetConfiguration.Builder().graphBuilder().addInputs("input1", "input2") + NeuralNetConfiguration.builder().graphBuilder().addInputs("input1", "input2") .setInputTypes(new InputType.InputTypeRecurrent(10, 12)) .addLayer("cnn1", new ConvolutionLayer.Builder(2, 2).stride(2, 2).nIn(1).nOut(5) @@ -259,7 +259,7 @@ public class ComputationGraphConfigurationTest extends BaseDL4JTest { // using runtime/reflection subtype mechanism in ComputationGraphConfiguration.fromJson() //Check a standard GraphVertex implementation, plus a static inner graph vertex - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().graphBuilder().addInputs("in") + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().graphBuilder().addInputs("in") .addVertex("test", new TestGraphVertex(3, 7), "in") .addVertex("test2", new StaticInnerGraphVertex(4, 5), "in").setOutputs("test", "test2").build(); @@ -282,7 +282,7 @@ public class ComputationGraphConfigurationTest extends BaseDL4JTest { @Test public void testOutputOrderDoesntChangeWhenCloning() { - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().graphBuilder().addInputs("in") + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().graphBuilder().addInputs("in") .addLayer("out1", new OutputLayer.Builder().nIn(1).nOut(1).build(), "in") .addLayer("out2", new OutputLayer.Builder().nIn(1).nOut(1).build(), "in") .addLayer("out3", new OutputLayer.Builder().nIn(1).nOut(1).build(), "in") @@ -299,7 +299,7 @@ public class ComputationGraphConfigurationTest extends BaseDL4JTest { @Test public void testAllowDisconnectedLayers() { - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().graphBuilder().addInputs("in") + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().graphBuilder().addInputs("in") .addLayer("bidirectional", new Bidirectional(new LSTM.Builder().activation(Activation.TANH).nOut(10).build()), "in") @@ -321,7 +321,7 @@ public class ComputationGraphConfigurationTest extends BaseDL4JTest { @Test public void testBidirectionalGraphSummary() { - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().graphBuilder().addInputs("in") + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().graphBuilder().addInputs("in") .addLayer("bidirectional", new Bidirectional(new LSTM.Builder().activation(Activation.TANH).nOut(10).build()), "in") @@ -408,7 +408,7 @@ public class ComputationGraphConfigurationTest extends BaseDL4JTest { if(nOut[i] == 1 && lossLayer) continue; //nOuts are not availabel in loss layer, can't expect it to detect this case try { - new NeuralNetConfiguration.Builder() + NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") .layer("0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in") diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/JsonTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/JsonTest.java index 190a89746..3e43bfdbe 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/JsonTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/JsonTest.java @@ -98,7 +98,7 @@ public class JsonTest extends BaseDL4JTest { for (int i = 0; i < lossFunctions.length; i++) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).updater(Updater.ADAM).list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345).updater(Updater.ADAM.getIUpdaterWithDefaultConfig()) .layer(0, new DenseLayer.Builder().nIn(4).nOut(nOut[i]).activation(Activation.TANH).build()) .layer(1, new LossLayer.Builder().lossFunction(lossFunctions[i]) .activation(outputActivationFn[i]).build()) @@ -107,8 +107,8 @@ public class JsonTest extends BaseDL4JTest { String json = conf.toJson(); String yaml = conf.toYaml(); - MultiLayerConfiguration fromJson = MultiLayerConfiguration.fromJson(json); - MultiLayerConfiguration fromYaml = MultiLayerConfiguration.fromYaml(yaml); + NeuralNetConfiguration fromJson = NeuralNetConfiguration.fromJson(json); + NeuralNetConfiguration fromYaml = NeuralNetConfiguration.fromYaml(yaml); assertEquals(conf, fromJson); assertEquals(conf, fromYaml); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/MultiLayerNeuralNetConfigurationTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/MultiLayerNeuralNetConfigurationTest.java index a10a9a3c7..700b70a6b 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/MultiLayerNeuralNetConfigurationTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/MultiLayerNeuralNetConfigurationTest.java @@ -20,14 +20,35 @@ package org.deeplearning4j.nn.conf; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotSame; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; + +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.util.Arrays; +import java.util.Properties; import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.exception.DL4JInvalidConfigException; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.OptimizationAlgorithm; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration.NeuralNetConfigurationBuilder; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.conf.layers.*; +import org.deeplearning4j.nn.conf.layers.BaseLayer; +import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; +import org.deeplearning4j.nn.conf.layers.DenseLayer; +import org.deeplearning4j.nn.conf.layers.GlobalPoolingLayer; +import org.deeplearning4j.nn.conf.layers.LossLayer; +import org.deeplearning4j.nn.conf.layers.OutputLayer; +import org.deeplearning4j.nn.conf.layers.PoolingType; +import org.deeplearning4j.nn.conf.layers.SubsamplingLayer; +import org.deeplearning4j.nn.conf.layers.Upsampling2D; import org.deeplearning4j.nn.conf.preprocessor.CnnToFeedForwardPreProcessor; import org.deeplearning4j.nn.conf.weightnoise.DropConnect; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; @@ -41,349 +62,349 @@ import org.nd4j.linalg.learning.config.Adam; import org.nd4j.linalg.learning.config.NoOp; import org.nd4j.linalg.lossfunctions.LossFunctions; -import java.io.*; -import java.util.Arrays; -import java.util.Properties; - -import static org.junit.jupiter.api.Assertions.*; - @Slf4j public class MultiLayerNeuralNetConfigurationTest extends BaseDL4JTest { - @TempDir - public File testDir; + @TempDir + public File testDir; - @Test - public void testJson() throws Exception { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list() - .layer(0, new DenseLayer.Builder().dist(new NormalDistribution(1, 1e-1)).build()) - .inputPreProcessor(0, new CnnToFeedForwardPreProcessor()).build(); + private static NeuralNetConfiguration getConf() { + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345L) + .layer(0, new DenseLayer.Builder().nIn(2).nOut(2) + .dist(new NormalDistribution(0, 1)).build()) + .layer(1, new OutputLayer.Builder().nIn(2).nOut(1) + .activation(Activation.TANH) + .dist(new NormalDistribution(0, 1)).lossFunction(LossFunctions.LossFunction.MSE) + .build()) + .build(); + return conf; + } - String json = conf.toJson(); - MultiLayerConfiguration from = MultiLayerConfiguration.fromJson(json); - assertEquals(conf.getConf(0), from.getConf(0)); + @Test + public void testJson() throws Exception { + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() + .layer(0, new DenseLayer.Builder().dist(new NormalDistribution(1, 1e-1)).build()) + .inputPreProcessor(0, new CnnToFeedForwardPreProcessor()).build(); - Properties props = new Properties(); - props.put("json", json); - String key = props.getProperty("json"); - assertEquals(json, key); - File f = new File(testDir, "props"); - f.deleteOnExit(); - BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(f)); - props.store(bos, ""); - bos.flush(); - bos.close(); - BufferedInputStream bis = new BufferedInputStream(new FileInputStream(f)); - Properties props2 = new Properties(); - props2.load(bis); - bis.close(); - assertEquals(props2.getProperty("json"), props.getProperty("json")); - String json2 = props2.getProperty("json"); - MultiLayerConfiguration conf3 = MultiLayerConfiguration.fromJson(json2); - assertEquals(conf.getConf(0), conf3.getConf(0)); + String json = conf.toJson(); + NeuralNetConfiguration from = NeuralNetConfiguration.fromJson(json); + assertEquals(conf.getConf(0), from.getConf(0)); + Properties props = new Properties(); + props.put("json", json); + String key = props.getProperty("json"); + assertEquals(json, key); + File f = new File(testDir, "props"); + f.deleteOnExit(); + BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(f)); + props.store(bos, ""); + bos.flush(); + bos.close(); + BufferedInputStream bis = new BufferedInputStream(new FileInputStream(f)); + Properties props2 = new Properties(); + props2.load(bis); + bis.close(); + assertEquals(props2.getProperty("json"), props.getProperty("json")); + String json2 = props2.getProperty("json"); + NeuralNetConfiguration conf3 = NeuralNetConfiguration.fromJson(json2); + assertEquals(conf.getConf(0), conf3.getConf(0)); + + } + + @Test + public void testConvnetJson() { + final int numRows = 76; + final int numColumns = 76; + int nChannels = 3; + int outputNum = 6; + int seed = 123; + + //setup the network + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().seed(seed) + .l1(1e-1).l2(2e-4).weightNoise(new DropConnect(0.5)).miniBatch(true) + .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT) + .layer(0, + new ConvolutionLayer.Builder(5, 5).nOut(5).dropOut(0.5).weightInit(WeightInit.XAVIER) + .activation(Activation.RELU).build()) + .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[]{2, 2}) + .build()) + .layer(2, + new ConvolutionLayer.Builder(3, 3).nOut(10).dropOut(0.5).weightInit(WeightInit.XAVIER) + .activation(Activation.RELU).build()) + .layer(3, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[]{2, 2}) + .build()) + .layer(4, new DenseLayer.Builder().nOut(100).activation(Activation.RELU).build()) + .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) + .nOut(outputNum).weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX) + .build()) + + .inputType(InputType.convolutional(numRows, numColumns, nChannels)); + + NeuralNetConfiguration conf = builder.build(); + String json = conf.toJson(); + NeuralNetConfiguration conf2 = NeuralNetConfiguration.fromJson(json); + assertEquals(conf, conf2); + } + + @Test + public void testUpsamplingConvnetJson() { + final int numRows = 76; + final int numColumns = 76; + int nChannels = 3; + int outputNum = 6; + int seed = 123; + + //setup the network + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().seed(seed) + .l1(1e-1).l2(2e-4).dropOut(0.5).miniBatch(true) + .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT) + .layer(new ConvolutionLayer.Builder(5, 5).nOut(5).dropOut(0.5).weightInit(WeightInit.XAVIER) + .activation(Activation.RELU).build()) + .layer(new Upsampling2D.Builder().size(2).build()) + .layer(2, + new ConvolutionLayer.Builder(3, 3).nOut(10).dropOut(0.5).weightInit(WeightInit.XAVIER) + .activation(Activation.RELU).build()) + .layer(new Upsampling2D.Builder().size(2).build()) + .layer(4, new DenseLayer.Builder().nOut(100).activation(Activation.RELU).build()) + .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) + .nOut(outputNum).weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX) + .build()) + + .inputType(InputType.convolutional(numRows, numColumns, nChannels)); + + NeuralNetConfiguration conf = builder.build(); + String json = conf.toJson(); + NeuralNetConfiguration conf2 = NeuralNetConfiguration.fromJson(json); + assertEquals(conf, conf2); + } + + @Test + public void testGlobalPoolingJson() { + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new NoOp()) + .dist(new NormalDistribution(0, 1.0)).seed(12345L) + .layer(0, new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1).nOut(5).build()) + .layer(1, new GlobalPoolingLayer.Builder().poolingType(PoolingType.PNORM).pnorm(3).build()) + .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX).nOut(3).build()) + .inputType(InputType.convolutional(32, 32, 1)).build(); + + String str = conf.toJson(); + NeuralNetConfiguration fromJson = NeuralNetConfiguration.fromJson(str); + + assertEquals(conf, fromJson); + } + + @Test + public void testYaml() throws Exception { + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() + .layer(0, new DenseLayer.Builder().dist(new NormalDistribution(1, 1e-1)).build()) + .inputPreProcessor(0, new CnnToFeedForwardPreProcessor()).build(); + String json = conf.toYaml(); + NeuralNetConfiguration from = NeuralNetConfiguration.fromYaml(json); + assertEquals(conf.getConf(0), from.getConf(0)); + + Properties props = new Properties(); + props.put("json", json); + String key = props.getProperty("json"); + assertEquals(json, key); + File f = new File(testDir, "props"); + f.deleteOnExit(); + BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(f)); + props.store(bos, ""); + bos.flush(); + bos.close(); + BufferedInputStream bis = new BufferedInputStream(new FileInputStream(f)); + Properties props2 = new Properties(); + props2.load(bis); + bis.close(); + assertEquals(props2.getProperty("json"), props.getProperty("json")); + String yaml = props2.getProperty("json"); + NeuralNetConfiguration conf3 = NeuralNetConfiguration.fromYaml(yaml); + assertEquals(conf.getConf(0), conf3.getConf(0)); + + } + + @Test + public void testClone() { + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() + .layer(0, new DenseLayer.Builder().build()) + .layer(1, new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).build()) + .inputPreProcessor(1, new CnnToFeedForwardPreProcessor()).build(); + + NeuralNetConfiguration conf2 = conf.clone(); + + assertEquals(conf, conf2); + assertNotSame(conf, conf2); + assertNotSame(conf.getNetConfigurations(), conf2.getNetConfigurations()); + for (int i = 0; i < conf.getNetConfigurations().size(); i++) { + assertNotSame(conf.getConf(i), conf2.getConf(i)); + } + assertNotSame(conf.getInputPreProcessors(), conf2.getInputPreProcessors()); + for (Integer layer : conf.getInputPreProcessors().keySet()) { + assertNotSame(conf.getInputPreProcess(layer), conf2.getInputPreProcess(layer)); + } + } + + @Test + public void testRandomWeightInit() { + MultiLayerNetwork model1 = new MultiLayerNetwork(getConf()); + model1.init(); + + Nd4j.getRandom().setSeed(12345L); + MultiLayerNetwork model2 = new MultiLayerNetwork(getConf()); + model2.init(); + + float[] p1 = model1.params().data().asFloat(); + float[] p2 = model2.params().data().asFloat(); + System.out.println(Arrays.toString(p1)); + System.out.println(Arrays.toString(p2)); + + org.junit.jupiter.api.Assertions.assertArrayEquals(p1, p2, 0.0f); + } + + @Test + public void testTrainingListener() { + MultiLayerNetwork model1 = new MultiLayerNetwork(getConf()); + model1.init(); + model1.addListeners(new ScoreIterationListener(1)); + + MultiLayerNetwork model2 = new MultiLayerNetwork(getConf()); + model2.addListeners(new ScoreIterationListener(1)); + model2.init(); + + Layer[] l1 = model1.getLayers(); + for (int i = 0; i < l1.length; i++) { + assertTrue(l1[i].getListeners() != null && l1[i].getListeners().size() == 1); + } + + Layer[] l2 = model2.getLayers(); + for (int i = 0; i < l2.length; i++) { + assertTrue(l2[i].getListeners() != null && l2[i].getListeners().size() == 1); + } + } + + @Test + public void testInvalidConfig() { + + try { + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345) + .build(); + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + fail("No exception thrown for invalid configuration"); + } catch (IllegalStateException e) { + //OK + log.error("", e); + } catch (Throwable e) { + log.error("", e); + fail("Unexpected exception thrown for invalid config"); } - @Test - public void testConvnetJson() { - final int numRows = 76; - final int numColumns = 76; - int nChannels = 3; - int outputNum = 6; - int seed = 123; - - //setup the network - MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(seed) - .l1(1e-1).l2(2e-4).weightNoise(new DropConnect(0.5)).miniBatch(true) - .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).list() - .layer(0, new ConvolutionLayer.Builder(5, 5).nOut(5).dropOut(0.5).weightInit(WeightInit.XAVIER) - .activation(Activation.RELU).build()) - .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] {2, 2}) - .build()) - .layer(2, new ConvolutionLayer.Builder(3, 3).nOut(10).dropOut(0.5).weightInit(WeightInit.XAVIER) - .activation(Activation.RELU).build()) - .layer(3, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] {2, 2}) - .build()) - .layer(4, new DenseLayer.Builder().nOut(100).activation(Activation.RELU).build()) - .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) - .nOut(outputNum).weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX) - .build()) - - .setInputType(InputType.convolutional(numRows, numColumns, nChannels)); - - MultiLayerConfiguration conf = builder.build(); - String json = conf.toJson(); - MultiLayerConfiguration conf2 = MultiLayerConfiguration.fromJson(json); - assertEquals(conf, conf2); + try { + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345) + .layer(1, new DenseLayer.Builder().nIn(3).nOut(4).build()) + .layer(2, new OutputLayer.Builder().nIn(4).nOut(5).build()) + .build(); + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + fail("No exception thrown for invalid configuration"); + } catch (IllegalStateException e) { + //OK + log.info(e.toString()); + } catch (Throwable e) { + log.error("", e); + fail("Unexpected exception thrown for invalid config"); } - @Test - public void testUpsamplingConvnetJson() { - final int numRows = 76; - final int numColumns = 76; - int nChannels = 3; - int outputNum = 6; - int seed = 123; - - //setup the network - MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(seed) - .l1(1e-1).l2(2e-4).dropOut(0.5).miniBatch(true) - .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).list() - .layer(new ConvolutionLayer.Builder(5, 5).nOut(5).dropOut(0.5).weightInit(WeightInit.XAVIER) - .activation(Activation.RELU).build()) - .layer(new Upsampling2D.Builder().size(2).build()) - .layer(2, new ConvolutionLayer.Builder(3, 3).nOut(10).dropOut(0.5).weightInit(WeightInit.XAVIER) - .activation(Activation.RELU).build()) - .layer(new Upsampling2D.Builder().size(2).build()) - .layer(4, new DenseLayer.Builder().nOut(100).activation(Activation.RELU).build()) - .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) - .nOut(outputNum).weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX) - .build()) - - .setInputType(InputType.convolutional(numRows, numColumns, nChannels)); - - MultiLayerConfiguration conf = builder.build(); - String json = conf.toJson(); - MultiLayerConfiguration conf2 = MultiLayerConfiguration.fromJson(json); - assertEquals(conf, conf2); + try { + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345) + .layer(0, new DenseLayer.Builder().nIn(3).nOut(4).build()) + .layer(2, new OutputLayer.Builder().nIn(4).nOut(5).build()) + .build(); + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + fail("No exception thrown for invalid configuration"); + } catch (IllegalStateException e) { + //OK + log.info(e.toString()); + } catch (Throwable e) { + log.error("", e); + fail("Unexpected exception thrown for invalid config"); } + } - @Test - public void testGlobalPoolingJson() { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new NoOp()) - .dist(new NormalDistribution(0, 1.0)).seed(12345L).list() - .layer(0, new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1).nOut(5).build()) - .layer(1, new GlobalPoolingLayer.Builder().poolingType(PoolingType.PNORM).pnorm(3).build()) - .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) - .activation(Activation.SOFTMAX).nOut(3).build()) - .setInputType(InputType.convolutional(32, 32, 1)).build(); + @Test + public void testListOverloads() { - String str = conf.toJson(); - MultiLayerConfiguration fromJson = MultiLayerConfiguration.fromJson(str); + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345) + .layer(0, new DenseLayer.Builder().nIn(3).nOut(4).build()) + .layer(1, new OutputLayer.Builder().nIn(4).nOut(5).activation(Activation.SOFTMAX).build()) + .build(); + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); - assertEquals(conf, fromJson); - } + DenseLayer dl = (DenseLayer) conf.getConf(0).getLayer(); + assertEquals(3, dl.getNIn()); + assertEquals(4, dl.getNOut()); + OutputLayer ol = (OutputLayer) conf.getConf(1).getLayer(); + assertEquals(4, ol.getNIn()); + assertEquals(5, ol.getNOut()); + + NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder().seed(12345) + .layer(0, new DenseLayer.Builder().nIn(3).nOut(4).build()) + .layer(1, new OutputLayer.Builder().nIn(4).nOut(5).activation(Activation.SOFTMAX).build()) + .build(); + MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); + net2.init(); + + NeuralNetConfiguration conf3 = NeuralNetConfiguration.builder().seed(12345) + .layer(new DenseLayer.Builder().nIn(3).nOut(4).build()) + .layer( + new OutputLayer.Builder().nIn(4).nOut(5).activation(Activation.SOFTMAX).build()) + .build(); + MultiLayerNetwork net3 = new MultiLayerNetwork(conf3); + net3.init(); + + assertEquals(conf, conf2); + assertEquals(conf, conf3); + } - @Test - public void testYaml() throws Exception { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list() - .layer(0, new DenseLayer.Builder().dist(new NormalDistribution(1, 1e-1)).build()) - .inputPreProcessor(0, new CnnToFeedForwardPreProcessor()).build(); - String json = conf.toYaml(); - MultiLayerConfiguration from = MultiLayerConfiguration.fromYaml(json); - assertEquals(conf.getConf(0), from.getConf(0)); + @Test + public void testBiasLr() { + //setup the network + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345) + .updater(new Adam(1e-2)) + .biasUpdater(new Adam(0.5)) + .layer(0, new ConvolutionLayer.Builder(5, 5).nOut(5).weightInit(WeightInit.XAVIER) + .activation(Activation.RELU).build()) + .layer(1, new DenseLayer.Builder().nOut(100).activation(Activation.RELU).build()) + .layer(2, new DenseLayer.Builder().nOut(100).activation(Activation.RELU).build()) + .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).nOut(10) + .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).build()) + .inputType(InputType.convolutional(28, 28, 1)).build(); - Properties props = new Properties(); - props.put("json", json); - String key = props.getProperty("json"); - assertEquals(json, key); - File f = new File(testDir, "props"); - f.deleteOnExit(); - BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(f)); - props.store(bos, ""); - bos.flush(); - bos.close(); - BufferedInputStream bis = new BufferedInputStream(new FileInputStream(f)); - Properties props2 = new Properties(); - props2.load(bis); - bis.close(); - assertEquals(props2.getProperty("json"), props.getProperty("json")); - String yaml = props2.getProperty("json"); - MultiLayerConfiguration conf3 = MultiLayerConfiguration.fromYaml(yaml); - assertEquals(conf.getConf(0), conf3.getConf(0)); + org.deeplearning4j.nn.conf.layers.BaseLayer l0 = (BaseLayer) conf.getConf(0).getLayer(); + org.deeplearning4j.nn.conf.layers.BaseLayer l1 = (BaseLayer) conf.getConf(1).getLayer(); + org.deeplearning4j.nn.conf.layers.BaseLayer l2 = (BaseLayer) conf.getConf(2).getLayer(); + org.deeplearning4j.nn.conf.layers.BaseLayer l3 = (BaseLayer) conf.getConf(3).getLayer(); - } + assertEquals(0.5, ((Adam) l0.getUpdaterByParam("b")).getLearningRate(), 1e-6); + assertEquals(1e-2, ((Adam) l0.getUpdaterByParam("W")).getLearningRate(), 1e-6); - @Test - public void testClone() { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list().layer(0, new DenseLayer.Builder().build()) - .layer(1, new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).build()) - .inputPreProcessor(1, new CnnToFeedForwardPreProcessor()).build(); + assertEquals(0.5, ((Adam) l1.getUpdaterByParam("b")).getLearningRate(), 1e-6); + assertEquals(1e-2, ((Adam) l1.getUpdaterByParam("W")).getLearningRate(), 1e-6); - MultiLayerConfiguration conf2 = conf.clone(); + assertEquals(0.5, ((Adam) l2.getUpdaterByParam("b")).getLearningRate(), 1e-6); + assertEquals(1e-2, ((Adam) l2.getUpdaterByParam("W")).getLearningRate(), 1e-6); - assertEquals(conf, conf2); - assertNotSame(conf, conf2); - assertNotSame(conf.getConfs(), conf2.getConfs()); - for (int i = 0; i < conf.getConfs().size(); i++) { - assertNotSame(conf.getConf(i), conf2.getConf(i)); - } - assertNotSame(conf.getInputPreProcessors(), conf2.getInputPreProcessors()); - for (Integer layer : conf.getInputPreProcessors().keySet()) { - assertNotSame(conf.getInputPreProcess(layer), conf2.getInputPreProcess(layer)); - } - } - - @Test - public void testRandomWeightInit() { - MultiLayerNetwork model1 = new MultiLayerNetwork(getConf()); - model1.init(); - - Nd4j.getRandom().setSeed(12345L); - MultiLayerNetwork model2 = new MultiLayerNetwork(getConf()); - model2.init(); - - float[] p1 = model1.params().data().asFloat(); - float[] p2 = model2.params().data().asFloat(); - System.out.println(Arrays.toString(p1)); - System.out.println(Arrays.toString(p2)); - - org.junit.jupiter.api.Assertions.assertArrayEquals(p1, p2, 0.0f); - } - - @Test - public void testTrainingListener() { - MultiLayerNetwork model1 = new MultiLayerNetwork(getConf()); - model1.init(); - model1.addListeners( new ScoreIterationListener(1)); - - MultiLayerNetwork model2 = new MultiLayerNetwork(getConf()); - model2.addListeners( new ScoreIterationListener(1)); - model2.init(); - - Layer[] l1 = model1.getLayers(); - for (int i = 0; i < l1.length; i++) - assertTrue(l1[i].getListeners() != null && l1[i].getListeners().size() == 1); - - Layer[] l2 = model2.getLayers(); - for (int i = 0; i < l2.length; i++) - assertTrue(l2[i].getListeners() != null && l2[i].getListeners().size() == 1); - } + assertEquals(0.5, ((Adam) l3.getUpdaterByParam("b")).getLearningRate(), 1e-6); + assertEquals(1e-2, ((Adam) l3.getUpdaterByParam("W")).getLearningRate(), 1e-6); + } - private static MultiLayerConfiguration getConf() { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345L).list() - .layer(0, new DenseLayer.Builder().nIn(2).nOut(2) - .dist(new NormalDistribution(0, 1)).build()) - .layer(1, new OutputLayer.Builder().nIn(2).nOut(1) - .activation(Activation.TANH) - .dist(new NormalDistribution(0, 1)).lossFunction(LossFunctions.LossFunction.MSE).build()) - .build(); - return conf; - } - - @Test - public void testInvalidConfig() { - - try { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).list() - .build(); - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - fail("No exception thrown for invalid configuration"); - } catch (IllegalStateException e) { - //OK - log.error("",e); - } catch (Throwable e) { - log.error("",e); - fail("Unexpected exception thrown for invalid config"); - } - - try { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).list() - .layer(1, new DenseLayer.Builder().nIn(3).nOut(4).build()) - .layer(2, new OutputLayer.Builder().nIn(4).nOut(5).build()) - .build(); - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - fail("No exception thrown for invalid configuration"); - } catch (IllegalStateException e) { - //OK - log.info(e.toString()); - } catch (Throwable e) { - log.error("",e); - fail("Unexpected exception thrown for invalid config"); - } - - try { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).list() - .layer(0, new DenseLayer.Builder().nIn(3).nOut(4).build()) - .layer(2, new OutputLayer.Builder().nIn(4).nOut(5).build()) - .build(); - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - fail("No exception thrown for invalid configuration"); - } catch (IllegalStateException e) { - //OK - log.info(e.toString()); - } catch (Throwable e) { - log.error("",e); - fail("Unexpected exception thrown for invalid config"); - } - } - - @Test - public void testListOverloads() { - - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).list() - .layer(0, new DenseLayer.Builder().nIn(3).nOut(4).build()) - .layer(1, new OutputLayer.Builder().nIn(4).nOut(5).activation(Activation.SOFTMAX).build()) - .build(); - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - - DenseLayer dl = (DenseLayer) conf.getConf(0).getLayer(); - assertEquals(3, dl.getNIn()); - assertEquals(4, dl.getNOut()); - OutputLayer ol = (OutputLayer) conf.getConf(1).getLayer(); - assertEquals(4, ol.getNIn()); - assertEquals(5, ol.getNOut()); - - MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder().seed(12345).list() - .layer(0, new DenseLayer.Builder().nIn(3).nOut(4).build()) - .layer(1, new OutputLayer.Builder().nIn(4).nOut(5).activation(Activation.SOFTMAX).build()) - .build(); - MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); - net2.init(); - - MultiLayerConfiguration conf3 = new NeuralNetConfiguration.Builder().seed(12345) - .list(new DenseLayer.Builder().nIn(3).nOut(4).build(), - new OutputLayer.Builder().nIn(4).nOut(5).activation(Activation.SOFTMAX).build()) - .build(); - MultiLayerNetwork net3 = new MultiLayerNetwork(conf3); - net3.init(); - - - assertEquals(conf, conf2); - assertEquals(conf, conf3); - } - - - @Test - public void testBiasLr() { - //setup the network - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).updater(new Adam(1e-2)) - .biasUpdater(new Adam(0.5)).list() - .layer(0, new ConvolutionLayer.Builder(5, 5).nOut(5).weightInit(WeightInit.XAVIER) - .activation(Activation.RELU).build()) - .layer(1, new DenseLayer.Builder().nOut(100).activation(Activation.RELU).build()) - .layer(2, new DenseLayer.Builder().nOut(100).activation(Activation.RELU).build()) - .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).nOut(10) - .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).build()) - .setInputType(InputType.convolutional(28, 28, 1)).build(); - - org.deeplearning4j.nn.conf.layers.BaseLayer l0 = (BaseLayer) conf.getConf(0).getLayer(); - org.deeplearning4j.nn.conf.layers.BaseLayer l1 = (BaseLayer) conf.getConf(1).getLayer(); - org.deeplearning4j.nn.conf.layers.BaseLayer l2 = (BaseLayer) conf.getConf(2).getLayer(); - org.deeplearning4j.nn.conf.layers.BaseLayer l3 = (BaseLayer) conf.getConf(3).getLayer(); - - assertEquals(0.5, ((Adam)l0.getUpdaterByParam("b")).getLearningRate(), 1e-6); - assertEquals(1e-2, ((Adam)l0.getUpdaterByParam("W")).getLearningRate(), 1e-6); - - assertEquals(0.5, ((Adam)l1.getUpdaterByParam("b")).getLearningRate(), 1e-6); - assertEquals(1e-2, ((Adam)l1.getUpdaterByParam("W")).getLearningRate(), 1e-6); - - assertEquals(0.5, ((Adam)l2.getUpdaterByParam("b")).getLearningRate(), 1e-6); - assertEquals(1e-2, ((Adam)l2.getUpdaterByParam("W")).getLearningRate(), 1e-6); - - assertEquals(0.5, ((Adam)l3.getUpdaterByParam("b")).getLearningRate(), 1e-6); - assertEquals(1e-2, ((Adam)l3.getUpdaterByParam("W")).getLearningRate(), 1e-6); - } - - - @Test - public void testInvalidOutputLayer(){ + @Test + public void testInvalidOutputLayer() { /* Test case (invalid configs) 1. nOut=1 + softmax @@ -393,37 +414,44 @@ public class MultiLayerNeuralNetConfigurationTest extends BaseDL4JTest { 5. mcxent + sigmoid */ - LossFunctions.LossFunction[] lf = new LossFunctions.LossFunction[]{ - LossFunctions.LossFunction.MCXENT, LossFunctions.LossFunction.MCXENT, LossFunctions.LossFunction.XENT, - LossFunctions.LossFunction.XENT, LossFunctions.LossFunction.MCXENT}; - int[] nOut = new int[]{1, 3, 3, 3, 3}; - Activation[] activations = new Activation[]{Activation.SOFTMAX, Activation.TANH, Activation.SOFTMAX, Activation.RELU, Activation.SIGMOID}; - for( int i=0; i r = net.getLayer(0).conf().getLayer().getRegularizationByParam("b"); + assertEquals(l1, TestUtils.getL1(net.getLayer(0).getLayerConfiguration().getRegularizationByParam("W")), 1e-4); + List r = net.getLayer(0).getLayerConfiguration().getRegularizationByParam("b"); assertEquals(0, r.size()); - r = net.getLayer(1).conf().getLayer().getRegularizationByParam("beta"); + r = net.getLayer(1).getLayerConfiguration().getRegularizationByParam("beta"); assertTrue(r == null || r.isEmpty()); - r = net.getLayer(1).conf().getLayer().getRegularizationByParam("gamma"); + r = net.getLayer(1).getLayerConfiguration().getRegularizationByParam("gamma"); assertTrue(r == null || r.isEmpty()); - r = net.getLayer(1).conf().getLayer().getRegularizationByParam("mean"); + r = net.getLayer(1).getLayerConfiguration().getRegularizationByParam("mean"); assertTrue(r == null || r.isEmpty()); - r = net.getLayer(1).conf().getLayer().getRegularizationByParam("var"); + r = net.getLayer(1).getLayerConfiguration().getRegularizationByParam("var"); assertTrue(r == null || r.isEmpty()); - assertEquals(l2, TestUtils.getL2(net.getLayer(2).conf().getLayer().getRegularizationByParam("W")), 1e-4); - r = net.getLayer(2).conf().getLayer().getRegularizationByParam("b"); + assertEquals(l2, TestUtils.getL2(net.getLayer(2).getLayerConfiguration().getRegularizationByParam("W")), 1e-4); + r = net.getLayer(2).getLayerConfiguration().getRegularizationByParam("b"); assertTrue(r == null || r.isEmpty()); } @@ -322,7 +322,7 @@ public class NeuralNetConfigurationTest extends BaseDL4JTest { .nIn(10).nOut(5).updater(new Sgd(1e-1)) .lossFunction(LossFunctions.LossFunction.KL_DIVERGENCE).build(); - NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().seed(42).layer(layer).build(); + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(42).layer(layer).build(); } } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/constraints/TestConstraints.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/constraints/TestConstraints.java index 37260087d..afbb64726 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/constraints/TestConstraints.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/constraints/TestConstraints.java @@ -26,7 +26,6 @@ import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.conf.BackpropType; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.constraint.MaxNormConstraint; import org.deeplearning4j.nn.conf.constraint.MinMaxNormConstraint; @@ -68,10 +67,10 @@ public class TestConstraints extends BaseDL4JTest { for (LayerConstraint lc : constraints) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .updater(new Sgd(0.0)) .dist(new NormalDistribution(0, 5)) - .list() + .layer(new LSTM.Builder().nIn(12).nOut(10) .constrainRecurrent(lc).build()) .layer(new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(8).build()) @@ -81,7 +80,7 @@ public class TestConstraints extends BaseDL4JTest { net.init(); LayerConstraint exp = lc.clone(); - assertEquals(exp.toString(), net.getLayer(0).conf().getLayer().getConstraints().get(0).toString()); + assertEquals(exp.toString(), net.getLayer(0).getLayerConfiguration().getConstraints().get(0).toString()); INDArray input = Nd4j.rand(3, 12); INDArray labels = Nd4j.rand(3, 8); @@ -120,11 +119,11 @@ public class TestConstraints extends BaseDL4JTest { for (LayerConstraint lc : constraints) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .updater(new Sgd(0.0)) .dist(new NormalDistribution(0, 5)) .biasInit(10.0) - .list() + .layer(new DenseLayer.Builder().nIn(12).nOut(10) .constrainBias(lc).build()) .layer(new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(8).build()) @@ -134,7 +133,7 @@ public class TestConstraints extends BaseDL4JTest { net.init(); LayerConstraint exp = lc.clone(); - assertEquals(exp.toString(), net.getLayer(0).conf().getLayer().getConstraints().get(0).toString()); + assertEquals(exp.toString(), net.getLayer(0).getLayerConfiguration().getConstraints().get(0).toString()); INDArray input = Nd4j.rand(3, 12); INDArray labels = Nd4j.rand(3, 8); @@ -173,10 +172,10 @@ public class TestConstraints extends BaseDL4JTest { for (LayerConstraint lc : constraints) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .updater(new Sgd(0.0)) .dist(new NormalDistribution(0, 5)) - .list() + .layer(new DenseLayer.Builder().nIn(12).nOut(10) .constrainWeights(lc).build()) .layer(new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(8).build()) @@ -186,7 +185,7 @@ public class TestConstraints extends BaseDL4JTest { net.init(); LayerConstraint exp = lc.clone(); - assertEquals(exp.toString(), net.getLayer(0).conf().getLayer().getConstraints().get(0).toString()); + assertEquals(exp.toString(), net.getLayer(0).getLayerConfiguration().getConstraints().get(0).toString()); INDArray input = Nd4j.rand(3, 12); INDArray labels = Nd4j.rand(3, 8); @@ -225,11 +224,11 @@ public class TestConstraints extends BaseDL4JTest { for (LayerConstraint lc : constraints) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .updater(new Sgd(0.0)) .dist(new NormalDistribution(0, 5)) .biasInit(0.2) - .list() + .layer(new DenseLayer.Builder().nIn(12).nOut(10) .constrainAllParameters(lc).build()) .layer(new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(8).build()) @@ -239,7 +238,7 @@ public class TestConstraints extends BaseDL4JTest { net.init(); LayerConstraint exp = lc.clone(); - assertEquals(exp.toString(), net.getLayer(0).conf().getLayer().getConstraints().get(0).toString()); + assertEquals(exp.toString(), net.getLayer(0).getLayerConfiguration().getConstraints().get(0).toString()); INDArray input = Nd4j.rand(3, 12); INDArray labels = Nd4j.rand(3, 8); @@ -286,11 +285,11 @@ public class TestConstraints extends BaseDL4JTest { for (LayerConstraint lc : constraints) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .updater(new Sgd(0.0)) .dist(new NormalDistribution(0, 5)) .biasInit(0.2) - .list() + .layer(new DenseLayer.Builder().nIn(12).nOut(10) .constrainWeights(lc).constrainBias(lc).build()) .layer(new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(8).build()) @@ -300,7 +299,7 @@ public class TestConstraints extends BaseDL4JTest { net.init(); LayerConstraint exp = lc.clone(); - assertEquals(exp.toString(), net.getLayer(0).conf().getLayer().getConstraints().get(0).toString()); + assertEquals(exp.toString(), net.getLayer(0).getLayerConfiguration().getConstraints().get(0).toString()); INDArray input = Nd4j.rand(3, 12); INDArray labels = Nd4j.rand(3, 8); @@ -346,12 +345,12 @@ public class TestConstraints extends BaseDL4JTest { for(LayerConstraint lc : constraints){ - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .constrainWeights(lc) .updater(new Sgd(0.0)) .dist(new NormalDistribution(0,5)) .biasInit(1) - .list() + .layer(new DenseLayer.Builder().nIn(12).nOut(10).build()) .layer(new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(8).build()) .build(); @@ -360,8 +359,8 @@ public class TestConstraints extends BaseDL4JTest { net.init(); LayerConstraint exp = lc.clone(); - assertEquals(exp.toString(), net.getLayer(0).conf().getLayer().getConstraints().get(0).toString()); - assertEquals(exp.toString(), net.getLayer(1).conf().getLayer().getConstraints().get(0).toString()); + assertEquals(exp.toString(), net.getLayer(0).getLayerConfiguration().getConstraints().get(0).toString()); + assertEquals(exp.toString(), net.getLayer(1).getLayerConfiguration().getConstraints().get(0).toString()); INDArray input = Nd4j.rand(3, 12); INDArray labels = Nd4j.rand(3, 8); @@ -400,7 +399,7 @@ public class TestConstraints extends BaseDL4JTest { int nIn = 10; int lstmLayerSize = 32; - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(WeightInit.RELU_UNIFORM) .updater(new RmsProp(learningRate)) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/dropout/TestDropout.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/dropout/TestDropout.java index 5c06f2adc..26c266dc7 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/dropout/TestDropout.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/dropout/TestDropout.java @@ -25,7 +25,6 @@ import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.TestUtils; import org.deeplearning4j.datasets.iterator.ExistingDataSetIterator; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.graph.LayerVertex; import org.deeplearning4j.nn.conf.layers.DenseLayer; @@ -60,21 +59,21 @@ public class TestDropout extends BaseDL4JTest { @Test public void testBasicConfig(){ - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dropOut(0.6) - .list() + .layer(new DenseLayer.Builder().nIn(10).nOut(10).build()) .layer(new DenseLayer.Builder().nIn(10).nOut(10).dropOut(0.7).build()) .layer(new DenseLayer.Builder().nIn(10).nOut(10).dropOut(new AlphaDropout(0.5)).build()) .build(); - assertEquals(new Dropout(0.6), conf.getConf(0).getLayer().getIDropout()); - assertEquals(new Dropout(0.7), conf.getConf(1).getLayer().getIDropout()); - assertEquals(new AlphaDropout(0.5), conf.getConf(2).getLayer().getIDropout()); + assertEquals(new Dropout(0.6), conf.getFlattenedLayerConfigurations().get(0).getIDropout()); + assertEquals(new Dropout(0.7), conf.getFlattenedLayerConfigurations().get(1).getIDropout()); + assertEquals(new AlphaDropout(0.5), conf.getFlattenedLayerConfigurations().get(2).getIDropout()); - ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder() - .dropOut(0.6) + ComputationGraphConfiguration conf2 = NeuralNetConfiguration.builder() + .dropOut( new Dropout(0.6)) .graphBuilder() .addInputs("in") .addLayer("0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in") @@ -83,9 +82,9 @@ public class TestDropout extends BaseDL4JTest { .setOutputs("2") .build(); - assertEquals(new Dropout(0.6), ((LayerVertex)conf2.getVertices().get("0")).getLayerConf().getLayer().getIDropout()); - assertEquals(new Dropout(0.7), ((LayerVertex)conf2.getVertices().get("1")).getLayerConf().getLayer().getIDropout()); - assertEquals(new AlphaDropout(0.5), ((LayerVertex)conf2.getVertices().get("2")).getLayerConf().getLayer().getIDropout()); + assertEquals(new Dropout(0.6), ((LayerVertex)conf2.getVertices().get("0")).getNetConfiguration().getFirstLayer().getIDropout()); + assertEquals(new Dropout(0.7), ((LayerVertex)conf2.getVertices().get("1")).getNetConfiguration().getFirstLayer().getIDropout()); + assertEquals(new AlphaDropout(0.5), ((LayerVertex)conf2.getVertices().get("2")).getNetConfiguration().getFirstLayer().getIDropout()); } @Test @@ -94,8 +93,8 @@ public class TestDropout extends BaseDL4JTest { CustomDropout d1 = new CustomDropout(); CustomDropout d2 = new CustomDropout(); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() + .layer(new DenseLayer.Builder().nIn(4).nOut(3).dropOut(d1).build()) .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MSE).dropOut(d2).nIn(3).nOut(3).build()) .build(); @@ -129,7 +128,7 @@ public class TestDropout extends BaseDL4JTest { d1 = new CustomDropout(); d2 = new CustomDropout(); - ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf2 = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") .addLayer("0", new DenseLayer.Builder().nIn(4).nOut(3).dropOut(d1).build(), "in") @@ -186,9 +185,9 @@ public class TestDropout extends BaseDL4JTest { for(IDropout id : dropouts) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dropOut(id) - .list() + .layer(new DenseLayer.Builder().nIn(4).nOut(3).build()) .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MSE).nIn(3).nOut(3).build()) .build(); @@ -197,7 +196,7 @@ public class TestDropout extends BaseDL4JTest { TestUtils.testModelSerialization(net); - ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf2 = NeuralNetConfiguration.builder() .dropOut(id) .graphBuilder() .addInputs("in") @@ -601,13 +600,13 @@ public class TestDropout extends BaseDL4JTest { @Test public void testSpatialDropoutJSON(){ - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() + .layer(new DropoutLayer.Builder(new SpatialDropout(0.5)).build()) .build(); String asJson = conf.toJson(); - MultiLayerConfiguration fromJson = MultiLayerConfiguration.fromJson(asJson); + NeuralNetConfiguration fromJson = NeuralNetConfiguration.fromJson(asJson); assertEquals(conf, fromJson); } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/graph/ElementWiseVertexTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/graph/ElementWiseVertexTest.java index 046cf0f63..02babc8bc 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/graph/ElementWiseVertexTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/graph/ElementWiseVertexTest.java @@ -70,7 +70,7 @@ public class ElementWiseVertexTest extends BaseDL4JTest { public void testElementWiseVertexForwardAdd() { int batchsz = 24; int featuresz = 17; - ComputationGraphConfiguration cgc = new NeuralNetConfiguration.Builder().graphBuilder() + ComputationGraphConfiguration cgc = NeuralNetConfiguration.builder().graphBuilder() .addInputs("input1", "input2", "input3") .addLayer("denselayer", new DenseLayer.Builder().nIn(featuresz).nOut(1).activation(Activation.IDENTITY) @@ -111,7 +111,7 @@ public class ElementWiseVertexTest extends BaseDL4JTest { public void testElementWiseVertexForwardProduct() { int batchsz = 24; int featuresz = 17; - ComputationGraphConfiguration cgc = new NeuralNetConfiguration.Builder().graphBuilder() + ComputationGraphConfiguration cgc = NeuralNetConfiguration.builder().graphBuilder() .addInputs("input1", "input2", "input3") .addLayer("denselayer", new DenseLayer.Builder().nIn(featuresz).nOut(1).activation(Activation.IDENTITY) @@ -152,7 +152,7 @@ public class ElementWiseVertexTest extends BaseDL4JTest { public void testElementWiseVertexForwardSubtract() { int batchsz = 24; int featuresz = 17; - ComputationGraphConfiguration cgc = new NeuralNetConfiguration.Builder().graphBuilder() + ComputationGraphConfiguration cgc = NeuralNetConfiguration.builder().graphBuilder() .addInputs("input1", "input2") .addLayer("denselayer", new DenseLayer.Builder().nIn(featuresz).nOut(1).activation(Activation.IDENTITY) @@ -194,7 +194,7 @@ public class ElementWiseVertexTest extends BaseDL4JTest { int featuresz = 17; int midsz = 13; int outputsz = 11; - ComputationGraphConfiguration cgc = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER) + ComputationGraphConfiguration cgc = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER) .dataType(DataType.DOUBLE) .biasInit(0.0).updater(new Sgd()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).graphBuilder() @@ -370,7 +370,7 @@ public class ElementWiseVertexTest extends BaseDL4JTest { int featuresz = 17; int midsz = 13; int outputsz = 11; - ComputationGraphConfiguration cgc = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER) + ComputationGraphConfiguration cgc = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER) .dataType(DataType.DOUBLE) .biasInit(0.0).updater(new Sgd()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).graphBuilder() @@ -545,7 +545,7 @@ public class ElementWiseVertexTest extends BaseDL4JTest { int featuresz = 17; int midsz = 13; int outputsz = 11; - ComputationGraphConfiguration cgc = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER) + ComputationGraphConfiguration cgc = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER) .dataType(DataType.DOUBLE) .biasInit(0.0).updater(new Sgd()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).graphBuilder() diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/graph/ShiftVertexTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/graph/ShiftVertexTest.java index acab33814..cf0e743e6 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/graph/ShiftVertexTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/graph/ShiftVertexTest.java @@ -84,7 +84,7 @@ public class ShiftVertexTest extends BaseDL4JTest { INDArray input = Nd4j .create(new double[][] {{0.2, 0.3, 0.5}, {0.7, 1.1, 1.3}, {1.7, 1.9, 2.3}, {2.9, 3.1, 3.7}}); double sf = 4.1; - ComputationGraphConfiguration cgc = new NeuralNetConfiguration.Builder().graphBuilder().addInputs("input") + ComputationGraphConfiguration cgc = NeuralNetConfiguration.builder().graphBuilder().addInputs("input") .addLayer("denselayer", new DenseLayer.Builder().nIn(input.columns()).nOut(1) .activation(Activation.IDENTITY).build(), @@ -138,7 +138,7 @@ public class ShiftVertexTest extends BaseDL4JTest { INDArray target = Nd4j.create(new double[][] {{0.05, 0.10, 0.15, 0.20, 0.25}, {0.30, 0.35, 0.40, 0.45, 0.50}, {0.55, 0.60, 0.65, 0.70, 0.75}, {0.80, 0.85, 0.90, 0.95, 0.99}}); - ComputationGraphConfiguration cgc = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER) + ComputationGraphConfiguration cgc = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER) .dataType(DataType.DOUBLE) .updater(new Sgd(0.01)) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).graphBuilder() diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerBuilderTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerBuilderTest.java index 484da1ff9..e4e7ce73c 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerBuilderTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerBuilderTest.java @@ -199,8 +199,8 @@ public class LayerBuilderTest extends BaseDL4JTest { assertEquals(act, activationLayer.activationFn); } - private void checkSerialization(Layer layer) throws Exception { - NeuralNetConfiguration confExpected = new NeuralNetConfiguration.Builder().layer(layer).build(); + private void checkSerialization(LayerConfiguration layer) throws Exception { + NeuralNetConfiguration confExpected = NeuralNetConfiguration.builder().layer(layer).build(); NeuralNetConfiguration confActual; // check Java serialization @@ -212,21 +212,21 @@ public class LayerBuilderTest extends BaseDL4JTest { try (ByteArrayInputStream bis = new ByteArrayInputStream(data); ObjectInput in = new ObjectInputStream(bis)) { confActual = (NeuralNetConfiguration) in.readObject(); } - assertEquals(confExpected.getLayer(), confActual.getLayer(), "unequal Java serialization"); + assertEquals(confExpected.getFirstLayer(), confActual.getFirstLayer(), "unequal Java serialization"); // check JSON String json = confExpected.toJson(); confActual = NeuralNetConfiguration.fromJson(json); - assertEquals(confExpected.getLayer(), confActual.getLayer(), "unequal JSON serialization"); + assertEquals(confExpected.getFirstLayer(), confActual.getFirstLayer(), "unequal JSON serialization"); // check YAML String yaml = confExpected.toYaml(); confActual = NeuralNetConfiguration.fromYaml(yaml); - assertEquals(confExpected.getLayer(), confActual.getLayer(), "unequal YAML serialization"); + assertEquals(confExpected.getFirstLayer(), confActual.getFirstLayer(), "unequal YAML serialization"); // check the layer's use of callSuper on equals method - confActual.getLayer().setIDropout(new Dropout(new java.util.Random().nextDouble())); - assertNotEquals( confExpected.getLayer(), confActual.getLayer(), "broken equals method (missing callSuper?)"); + confActual.getFirstLayer().setIDropout(new Dropout(new java.util.Random().nextDouble())); + assertNotEquals( confExpected.getFirstLayer(), confActual.getFirstLayer(), "broken equals method (missing callSuper?)"); } } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigTest.java index be25a0ccd..db3731f6d 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigTest.java @@ -22,7 +22,6 @@ package org.deeplearning4j.nn.conf.layers; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.nn.conf.GradientNormalization; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.distribution.Distribution; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; @@ -53,7 +52,7 @@ public class LayerConfigTest extends BaseDL4JTest { String name1 = "genisys"; String name2 = "bill"; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).name(name1).build()) .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).name(name2).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -67,7 +66,7 @@ public class LayerConfigTest extends BaseDL4JTest { @Test public void testActivationLayerwiseOverride() { //Without layerwise override: - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().activation(Activation.RELU).list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().activation(Activation.RELU) .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -77,7 +76,7 @@ public class LayerConfigTest extends BaseDL4JTest { assertEquals("relu", ((BaseLayer) conf.getConf(1).getLayer()).getActivationFn().toString()); //With - conf = new NeuralNetConfiguration.Builder().activation(Activation.RELU).list() + conf = NeuralNetConfiguration.builder().activation(Activation.RELU) .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).activation(Activation.TANH).build()).build(); @@ -93,8 +92,8 @@ public class LayerConfigTest extends BaseDL4JTest { public void testWeightBiasInitLayerwiseOverride() { //Without layerwise override: final Distribution defaultDistribution = new NormalDistribution(0, 1.0); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .dist(defaultDistribution).biasInit(1).list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() + .dist(defaultDistribution).biasInit(1) .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -108,8 +107,8 @@ public class LayerConfigTest extends BaseDL4JTest { //With: final Distribution overriddenDistribution = new UniformDistribution(0, 1); - conf = new NeuralNetConfiguration.Builder() - .dist(defaultDistribution).biasInit(1).list() + conf = NeuralNetConfiguration.builder() + .dist(defaultDistribution).biasInit(1) .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()).layer(1, new DenseLayer.Builder().nIn(2).nOut(2) .dist(overriddenDistribution).biasInit(0).build()) @@ -132,7 +131,7 @@ public class LayerConfigTest extends BaseDL4JTest { // the global config, and check they actually work. //Learning rate without layerwise override: - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(0.3).list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().learningRate(0.3) .layer(0, new DenseLayerConfiguration.Builder().nIn(2).nOut(2).build()) .layer(1, new DenseLayerConfiguration.Builder().nIn(2).nOut(2).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -142,7 +141,7 @@ public class LayerConfigTest extends BaseDL4JTest { assertEquals(0.3, ((BaseLayer) conf.getConf(1).getLayer()).getLearningRate(), 0.0); //With: - conf = new NeuralNetConfiguration.Builder().learningRate(0.3).list() + conf = NeuralNetConfiguration.builder().learningRate(0.3) .layer(0, new DenseLayerConfiguration.Builder().nIn(2).nOut(2).build()) .layer(1, new DenseLayerConfiguration.Builder().nIn(2).nOut(2).learningRate(0.2).build()).build(); @@ -153,7 +152,7 @@ public class LayerConfigTest extends BaseDL4JTest { assertEquals(0.2, ((BaseLayer) conf.getConf(1).getLayer()).getLearningRate(), 0.0); //L1 and L2 without layerwise override: - conf = new NeuralNetConfiguration.Builder().l1(0.1).l2(0.2).list() + conf = NeuralNetConfiguration.builder().l1(0.1).l2(0.2) .layer(0, new DenseLayerConfiguration.Builder().nIn(2).nOut(2).build()) .layer(1, new DenseLayerConfiguration.Builder().nIn(2).nOut(2).build()).build(); net = new MultiLayerNetwork(conf); @@ -165,7 +164,7 @@ public class LayerConfigTest extends BaseDL4JTest { assertEquals(0.2, ((BaseLayer) conf.getConf(1).getLayer()).getL2(), 0.0); //L1 and L2 with layerwise override: - conf = new NeuralNetConfiguration.Builder().l1(0.1).l2(0.2).list() + conf = NeuralNetConfiguration.builder().l1(0.1).l2(0.2) .layer(0, new DenseLayerConfiguration.Builder().nIn(2).nOut(2).l1(0.9).build()) .layer(1, new DenseLayerConfiguration.Builder().nIn(2).nOut(2).l2(0.8).build()).build(); net = new MultiLayerNetwork(conf); @@ -181,7 +180,7 @@ public class LayerConfigTest extends BaseDL4JTest { @Test public void testDropoutLayerwiseOverride() { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().dropOut(1.0).list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().dropOut(1.0) .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -190,7 +189,7 @@ public class LayerConfigTest extends BaseDL4JTest { assertEquals(new Dropout(1.0), conf.getConf(0).getLayer().getIDropout()); assertEquals(new Dropout(1.0), conf.getConf(1).getLayer().getIDropout()); - conf = new NeuralNetConfiguration.Builder().dropOut(1.0).list() + conf = NeuralNetConfiguration.builder().dropOut(1.0) .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).dropOut(2.0).build()).build(); @@ -206,9 +205,9 @@ public class LayerConfigTest extends BaseDL4JTest { Map testMomentumAfter = new HashMap<>(); testMomentumAfter.put(0, 0.1); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .updater(new Nesterovs(1.0, new MapSchedule(ScheduleType.ITERATION, testMomentumAfter))) - .list() + .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -220,8 +219,8 @@ public class LayerConfigTest extends BaseDL4JTest { Map testMomentumAfter2 = new HashMap<>(); testMomentumAfter2.put(0, 0.2); - conf = new NeuralNetConfiguration.Builder().updater(new Nesterovs(1.0, new MapSchedule(ScheduleType.ITERATION, testMomentumAfter) )) - .list() + conf = NeuralNetConfiguration.builder().updater(new Nesterovs(1.0, new MapSchedule(ScheduleType.ITERATION, testMomentumAfter) )) + .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()).layer(1, new DenseLayer.Builder() .nIn(2).nOut(2).updater(new Nesterovs(1.0, new MapSchedule(ScheduleType.ITERATION, testMomentumAfter2))).build()) .build(); @@ -234,7 +233,7 @@ public class LayerConfigTest extends BaseDL4JTest { @Test public void testUpdaterRhoRmsDecayLayerwiseOverride() { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new AdaDelta(0.5, 0.9)).list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new AdaDelta(0.5, 0.9)) .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).updater(new AdaDelta(0.01,0.9)).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -245,7 +244,7 @@ public class LayerConfigTest extends BaseDL4JTest { assertEquals(0.5, ((AdaDelta)((BaseLayer) conf.getConf(0).getLayer()).getIUpdater()).getRho(), 0.0); assertEquals(0.01, ((AdaDelta)((BaseLayer) conf.getConf(1).getLayer()).getIUpdater()).getRho(), 0.0); - conf = new NeuralNetConfiguration.Builder().updater(new RmsProp(1.0, 2.0, RmsProp.DEFAULT_RMSPROP_EPSILON)).list() + conf = NeuralNetConfiguration.builder().updater(new RmsProp(1.0, 2.0, RmsProp.DEFAULT_RMSPROP_EPSILON)) .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).updater(new RmsProp(1.0, 1.0, RmsProp.DEFAULT_RMSPROP_EPSILON)).build()) .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).updater(new AdaDelta(0.5,AdaDelta.DEFAULT_ADADELTA_EPSILON)).build()) .build(); @@ -262,9 +261,9 @@ public class LayerConfigTest extends BaseDL4JTest { @Test public void testUpdaterAdamParamsLayerwiseOverride() { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .updater(new Adam(1.0, 0.5, 0.5, 1e-8)) - .list() + .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).updater(new Adam(1.0, 0.6, 0.7, 1e-8)).build()) .build(); @@ -281,9 +280,9 @@ public class LayerConfigTest extends BaseDL4JTest { public void testGradientNormalizationLayerwiseOverride() { //Learning rate without layerwise override: - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue) - .gradientNormalizationThreshold(10).list() + .gradientNormalizationThreshold(10) .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -297,9 +296,9 @@ public class LayerConfigTest extends BaseDL4JTest { assertEquals(10, conf.getConf(1).getLayer().getGradientNormalizationThreshold(), 0.0); //With: - conf = new NeuralNetConfiguration.Builder() + conf = NeuralNetConfiguration.builder() .gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue) - .gradientNormalizationThreshold(10).list() + .gradientNormalizationThreshold(10) .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) .layer(1, new DenseLayer.Builder().nIn(2).nOut(2) .gradientNormalization(GradientNormalization.None) @@ -323,7 +322,7 @@ public class LayerConfigTest extends BaseDL4JTest { double lr = 2; double lrDecayRate = 5; int iterations = 1; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().learningRate(lr) .updater(Updater.SGD) .learningRateDecayPolicy(LearningRatePolicy.Exponential).lrPolicyDecayRate(lrDecayRate).list() .layer(0, new DenseLayerConfiguration.Builder().nIn(2).nOut(2).build()) @@ -343,7 +342,7 @@ public class LayerConfigTest extends BaseDL4JTest { double lrDecayRate = 5; double power = 3; int iterations = 1; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().iterations(iterations).learningRate(lr) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().iterations(iterations).learningRate(lr) .learningRateDecayPolicy(LearningRatePolicy.Inverse).lrPolicyDecayRate(lrDecayRate) .lrPolicyPower(power).list().layer(0, new DenseLayerConfiguration.Builder().nIn(2).nOut(2).build()) .layer(1, new DenseLayerConfiguration.Builder().nIn(2).nOut(2).build()).build(); @@ -365,7 +364,7 @@ public class LayerConfigTest extends BaseDL4JTest { double lrDecayRate = 5; double steps = 4; int iterations = 1; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().iterations(iterations).learningRate(lr) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().iterations(iterations).learningRate(lr) .learningRateDecayPolicy(LearningRatePolicy.Step).lrPolicyDecayRate(lrDecayRate) .lrPolicySteps(steps).list().layer(0, new DenseLayerConfiguration.Builder().nIn(2).nOut(2).build()) .layer(1, new DenseLayerConfiguration.Builder().nIn(2).nOut(2).build()).build(); @@ -386,7 +385,7 @@ public class LayerConfigTest extends BaseDL4JTest { double lrDecayRate = 5; double power = 3; int iterations = 1; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().iterations(iterations).learningRate(lr) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().iterations(iterations).learningRate(lr) .learningRateDecayPolicy(LearningRatePolicy.Poly).lrPolicyDecayRate(lrDecayRate) .lrPolicyPower(power).list().layer(0, new DenseLayerConfiguration.Builder().nIn(2).nOut(2).build()) .layer(1, new DenseLayerConfiguration.Builder().nIn(2).nOut(2).build()).build(); @@ -407,7 +406,7 @@ public class LayerConfigTest extends BaseDL4JTest { double lrDecayRate = 5; double steps = 4; int iterations = 1; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().iterations(iterations).learningRate(lr) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().iterations(iterations).learningRate(lr) .learningRateDecayPolicy(LearningRatePolicy.Sigmoid).lrPolicyDecayRate(lrDecayRate) .lrPolicySteps(steps).list().layer(0, new DenseLayerConfiguration.Builder().nIn(2).nOut(2).build()) .layer(1, new DenseLayerConfiguration.Builder().nIn(2).nOut(2).build()).build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigValidationTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigValidationTest.java index 4b60f98c4..65532a0bc 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigValidationTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigValidationTest.java @@ -24,7 +24,6 @@ import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.TestUtils; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.distribution.Distribution; @@ -56,8 +55,8 @@ public class LayerConfigValidationTest extends BaseDL4JTest { @Test public void testDropConnect() { // Warning thrown only since some layers may not have l1 or l2 - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1)).weightNoise(new DropConnect(0.5)) - .list().layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new Sgd(0.1)).weightNoise(new DropConnect(0.5)) + .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -67,8 +66,8 @@ public class LayerConfigValidationTest extends BaseDL4JTest { @Test public void testL1L2NotSet() { // Warning thrown only since some layers may not have l1 or l2 - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.3)) - .list().layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new Sgd(0.3)) + .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -78,7 +77,7 @@ public class LayerConfigValidationTest extends BaseDL4JTest { //@Ignore //Old assumption: throw exception on l1 but no regularization. Current design: warn, not exception public void testRegNotSetL1Global() { assertThrows(IllegalStateException.class, () -> { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.3)).l1(0.5).list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new Sgd(0.3)).l1(0.5) .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -90,7 +89,7 @@ public class LayerConfigValidationTest extends BaseDL4JTest { //@Ignore //Old assumption: throw exception on l1 but no regularization. Current design: warn, not exception public void testRegNotSetL2Local() { assertThrows(IllegalStateException.class, () -> { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.3)).list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new Sgd(0.3)) .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).l2(0.5).build()) .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -101,9 +100,9 @@ public class LayerConfigValidationTest extends BaseDL4JTest { @Test public void testWeightInitDistNotSet() { // Warning thrown only since global dist can be set with a different weight init locally - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().updater(new Sgd(0.3)).dist(new GaussianDistribution(1e-3, 2)) - .list().layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder().updater(new Sgd(0.3)).dist(new GaussianDistribution(1e-3, 2)) + .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -115,8 +114,8 @@ public class LayerConfigValidationTest extends BaseDL4JTest { Map testMomentumAfter = new HashMap<>(); testMomentumAfter.put(0, 0.1); - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().updater(new Nesterovs(1.0, new MapSchedule(ScheduleType.ITERATION, testMomentumAfter))).list() + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder().updater(new Nesterovs(1.0, new MapSchedule(ScheduleType.ITERATION, testMomentumAfter))) .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -125,11 +124,11 @@ public class LayerConfigValidationTest extends BaseDL4JTest { @Test public void testCompGraphNullLayer() { - ComputationGraphConfiguration.GraphBuilder gb = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration.GraphBuilder gb = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(new Sgd(0.01)) .seed(42).miniBatch(false).l1(0.2).l2(0.2) /* Graph Builder */ - .updater(Updater.RMSPROP).graphBuilder().addInputs("in") + .updater(Updater.RMSPROP.getIUpdaterWithDefaultConfig()).graphBuilder().addInputs("in") .addLayer("L" + 1, new GravesLSTM.Builder().nIn(20).updater(Updater.RMSPROP).nOut(10) .weightInit(WeightInit.XAVIER) @@ -157,33 +156,33 @@ public class LayerConfigValidationTest extends BaseDL4JTest { double expectedL2 = 0.0; // Nesterovs Updater - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new Nesterovs(0.9)) - .list().layer(0, new DenseLayer.Builder().nIn(2).nOut(2).l2(0.5).build()) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new Nesterovs(0.9)) + .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).l2(0.5).build()) .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).updater(new Nesterovs(0.3, 0.4)).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); - BaseLayer layerConf = (BaseLayer) net.getLayer(0).conf().getLayer(); + BaseLayer layerConf = (BaseLayer) net.getLayer(0).getLayerConfiguration(); assertEquals(expectedMomentum, ((Nesterovs) layerConf.getIUpdater()).getMomentum(), 1e-3); assertNull(TestUtils.getL1Reg(layerConf.getRegularization())); assertEquals(0.5, TestUtils.getL2(layerConf), 1e-3); - BaseLayer layerConf1 = (BaseLayer) net.getLayer(1).conf().getLayer(); + BaseLayer layerConf1 = (BaseLayer) net.getLayer(1).getLayerConfiguration(); assertEquals(0.4, ((Nesterovs) layerConf1.getIUpdater()).getMomentum(), 1e-3); // Adam Updater - conf = new NeuralNetConfiguration.Builder().updater(new Adam(0.3)) - .weightInit(new WeightInitDistribution(expectedDist)).list() + conf = NeuralNetConfiguration.builder().updater(new Adam(0.3)) + .weightInit(expectedDist) .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).l2(0.5).l1(0.3).build()) .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); net = new MultiLayerNetwork(conf); net.init(); - layerConf = (BaseLayer) net.getLayer(0).conf().getLayer(); + layerConf = (BaseLayer) net.getLayer(0).getLayerConfiguration(); assertEquals(0.3, TestUtils.getL1(layerConf), 1e-3); assertEquals(0.5, TestUtils.getL2(layerConf), 1e-3); - layerConf1 = (BaseLayer) net.getLayer(1).conf().getLayer(); + layerConf1 = (BaseLayer) net.getLayer(1).getLayerConfiguration(); assertEquals(expectedAdamMeanDecay, ((Adam) layerConf1.getIUpdater()).getBeta1(), 1e-3); assertEquals(expectedAdamVarDecay, ((Adam) layerConf1.getIUpdater()).getBeta2(), 1e-3); assertEquals(new WeightInitDistribution(expectedDist), layerConf1.getWeightInitFn()); @@ -191,18 +190,18 @@ public class LayerConfigValidationTest extends BaseDL4JTest { assertNull(TestUtils.getL2Reg(layerConf1.getRegularization())); //RMSProp Updater - conf = new NeuralNetConfiguration.Builder().updater(new RmsProp(0.3)).list() + conf = NeuralNetConfiguration.builder().updater(new RmsProp(0.3)) .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).updater(new RmsProp(0.3, 0.4, RmsProp.DEFAULT_RMSPROP_EPSILON)).build()).build(); net = new MultiLayerNetwork(conf); net.init(); - layerConf = (BaseLayer) net.getLayer(0).conf().getLayer(); + layerConf = (BaseLayer) net.getLayer(0).getLayerConfiguration(); assertEquals(expectedRmsDecay, ((RmsProp) layerConf.getIUpdater()).getRmsDecay(), 1e-3); assertNull(TestUtils.getL1Reg(layerConf.getRegularization())); assertNull(TestUtils.getL2Reg(layerConf.getRegularization())); - layerConf1 = (BaseLayer) net.getLayer(1).conf().getLayer(); + layerConf1 = (BaseLayer) net.getLayer(1).getLayerConfiguration(); assertEquals(0.4, ((RmsProp) layerConf1.getIUpdater()).getRmsDecay(), 1e-3); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/preprocessor/CNNProcessorTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/preprocessor/CNNProcessorTest.java index 48112c682..d530e416d 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/preprocessor/CNNProcessorTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/preprocessor/CNNProcessorTest.java @@ -23,6 +23,7 @@ package org.deeplearning4j.nn.conf.preprocessor; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.*; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration.NeuralNetConfigurationBuilder; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; @@ -233,64 +234,60 @@ public class CNNProcessorTest extends BaseDL4JTest { @Test public void testInvalidInputShape(){ - NeuralNetConfiguration.Builder builder = new NeuralNetConfiguration.Builder() - .seed(123) - .miniBatch(true) - .cacheMode(CacheMode.DEVICE) - .updater(new Nesterovs(0.9)) - .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT); int[] kernelArray = new int[]{3,3}; int[] strideArray = new int[]{1,1}; int[] zeroPaddingArray = new int[]{0,0}; int processWidth = 4; - NeuralNetConfiguration.ListBuilder listBuilder = builder.list(); // Building the DL4J network - listBuilder = listBuilder.layer(0, new ConvolutionLayer.Builder(kernelArray, strideArray, zeroPaddingArray) + NeuralNetConfiguration conf =NeuralNetConfiguration.builder() + .seed(123) + .miniBatch(true) + .cacheMode(CacheMode.DEVICE) + .updater(new Nesterovs(0.9)) + .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer) + .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) + // Building the DL4J network + .layer(0, new ConvolutionLayer.Builder(kernelArray, strideArray, zeroPaddingArray) .name("cnn1") .convolutionMode(ConvolutionMode.Strict) .nIn(2) // 2 input channels .nOut(processWidth) .weightInit(WeightInit.XAVIER_UNIFORM) .activation(Activation.RELU) - .biasInit(1e-2).build()); + .biasInit(1e-2).build()) - listBuilder = listBuilder.layer(1, new ConvolutionLayer.Builder(kernelArray, strideArray, zeroPaddingArray) + .layer(1, new ConvolutionLayer.Builder(kernelArray, strideArray, zeroPaddingArray) .name("cnn2") .convolutionMode(ConvolutionMode.Strict) .nOut(processWidth) .weightInit(WeightInit.XAVIER_UNIFORM) .activation(Activation.RELU) .biasInit(1e-2) - .build()); + .build()) - listBuilder = listBuilder.layer(2, new ConvolutionLayer.Builder(kernelArray, strideArray, zeroPaddingArray) + .layer(2, new ConvolutionLayer.Builder(kernelArray, strideArray, zeroPaddingArray) .name("cnn3") .convolutionMode(ConvolutionMode.Strict) .nOut(processWidth) .weightInit(WeightInit.XAVIER_UNIFORM) - .activation(Activation.RELU).build()); + .activation(Activation.RELU).build()) - listBuilder = listBuilder.layer(3, new ConvolutionLayer.Builder(kernelArray, strideArray, zeroPaddingArray) + .layer(3, new ConvolutionLayer.Builder(kernelArray, strideArray, zeroPaddingArray) .name("cnn4") .convolutionMode(ConvolutionMode.Strict) .nOut(processWidth) .weightInit(WeightInit.XAVIER_UNIFORM) - .activation(Activation.RELU).build()); + .activation(Activation.RELU).build()) - listBuilder = listBuilder - .layer(4, new OutputLayer.Builder(LossFunctions.LossFunction.MSE) + .layer(4, new OutputLayer.Builder(LossFunctions.LossFunction.MSE) .name("output") .nOut(1) .activation(Activation.TANH) - .build()); + .build()) - MultiLayerConfiguration conf = listBuilder - - - .setInputType(InputType.convolutional(20, 10, 2)) + .inputType(InputType.convolutional(20, 10, 2)) .build(); // For some reason, this model works diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/preprocessor/CustomPreprocessorTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/preprocessor/CustomPreprocessorTest.java index 36bfbc95f..c5755753a 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/preprocessor/CustomPreprocessorTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/preprocessor/CustomPreprocessorTest.java @@ -21,8 +21,6 @@ package org.deeplearning4j.nn.conf.preprocessor; import org.deeplearning4j.BaseDL4JTest; -import org.deeplearning4j.nn.conf.InputPreProcessor; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; @@ -30,11 +28,6 @@ import org.deeplearning4j.nn.conf.preprocessor.custom.MyCustomPreprocessor; import org.junit.jupiter.api.Test; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.lossfunctions.LossFunctions; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.introspect.AnnotatedClass; -import com.fasterxml.jackson.databind.jsontype.NamedType; - -import java.util.Collection; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -44,8 +37,8 @@ public class CustomPreprocessorTest extends BaseDL4JTest { @Test public void testCustomPreprocessor() { //Second: let's create a MultiLayerCofiguration with one, and check JSON and YAML config actually works... - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()) .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(10) .activation(Activation.SOFTMAX).nOut(10).build()) @@ -57,10 +50,10 @@ public class CustomPreprocessorTest extends BaseDL4JTest { // System.out.println(json); - MultiLayerConfiguration confFromJson = MultiLayerConfiguration.fromJson(json); + NeuralNetConfiguration confFromJson = NeuralNetConfiguration.fromJson(json); assertEquals(conf, confFromJson); - MultiLayerConfiguration confFromYaml = MultiLayerConfiguration.fromYaml(yaml); + NeuralNetConfiguration confFromYaml = NeuralNetConfiguration.fromYaml(yaml); assertEquals(conf, confFromYaml); assertTrue(confFromJson.getInputPreProcess(0) instanceof MyCustomPreprocessor); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/preprocessor/TestPreProcessors.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/preprocessor/TestPreProcessors.java index 56c6cfb1d..1f279a762 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/preprocessor/TestPreProcessors.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/preprocessor/TestPreProcessors.java @@ -23,7 +23,6 @@ package org.deeplearning4j.nn.conf.preprocessor; import lombok.val; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.nn.conf.InputPreProcessor; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.FeedForwardLayer; @@ -58,14 +57,14 @@ public class TestPreProcessors extends BaseDL4JTest { int timeSeriesLength = timeSeriesLengths[x]; RnnToFeedForwardPreProcessor proc = new RnnToFeedForwardPreProcessor(); - NeuralNetConfiguration nnc = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration nnc = NeuralNetConfiguration.builder() .layer(new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(layerSize) .nOut(layerSize).build()) .build(); - long numParams = nnc.getLayer().initializer().numParams(nnc); + long numParams = nnc.getFirstLayer().initializer().numParams(nnc); INDArray params = Nd4j.create(1, numParams); - DenseLayer layer = (DenseLayer) nnc.getLayer().instantiate(nnc, null, 0, params, true, params.dataType()); + DenseLayer layer = (DenseLayer) nnc.getFirstLayer().instantiate(nnc, null, 0, params, true, params.dataType()); layer.setInputMiniBatchSize(miniBatchSize); INDArray activations3dc = Nd4j.create(new int[] {miniBatchSize, layerSize, timeSeriesLength}, 'c'); @@ -143,14 +142,14 @@ public class TestPreProcessors extends BaseDL4JTest { FeedForwardToRnnPreProcessor proc = new FeedForwardToRnnPreProcessor(); - NeuralNetConfiguration nnc = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration nnc = NeuralNetConfiguration.builder() .layer(new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(layerSize) .nOut(layerSize).build()) .build(); - val numParams = nnc.getLayer().initializer().numParams(nnc); + val numParams = nnc.getFirstLayer().initializer().numParams(nnc); INDArray params = Nd4j.create(1, numParams); - DenseLayer layer = (DenseLayer) nnc.getLayer().instantiate(nnc, null, 0, params, true, params.dataType()); + DenseLayer layer = (DenseLayer) nnc.getFirstLayer().instantiate(nnc, null, 0, params, true, params.dataType()); layer.setInputMiniBatchSize(miniBatchSize); INDArray rand = Nd4j.rand(miniBatchSize * timeSeriesLength, layerSize); @@ -227,16 +226,16 @@ public class TestPreProcessors extends BaseDL4JTest { InputPreProcessor proc = new CnnToRnnPreProcessor(inputHeight, inputWidth, nChannels); NeuralNetConfiguration nnc = - new NeuralNetConfiguration.Builder() + NeuralNetConfiguration.builder() .layer(new org.deeplearning4j.nn.conf.layers.ConvolutionLayer.Builder( inputWidth, inputHeight).nIn(cnnNChannelsIn) .nOut(nChannels).build()) .build(); - val numParams = nnc.getLayer().initializer().numParams(nnc); + val numParams = nnc.getFirstLayer().initializer().numParams(nnc); INDArray params = Nd4j.create(1, numParams); ConvolutionLayer layer = - (ConvolutionLayer) nnc.getLayer().instantiate(nnc, null, 0, params, true, params.dataType()); + (ConvolutionLayer) nnc.getFirstLayer().instantiate(nnc, null, 0, params, true, params.dataType()); layer.setInputMiniBatchSize(miniBatchSize); INDArray activationsCnn = Nd4j.rand(miniBatchSize * timeSeriesLength, nChannels, @@ -309,16 +308,16 @@ public class TestPreProcessors extends BaseDL4JTest { InputPreProcessor proc = new RnnToCnnPreProcessor(inputHeight, inputWidth, nChannels); NeuralNetConfiguration nnc = - new NeuralNetConfiguration.Builder() + NeuralNetConfiguration.builder() .layer(new org.deeplearning4j.nn.conf.layers.ConvolutionLayer.Builder( inputWidth, inputHeight).nIn(cnnNChannelsIn) .nOut(nChannels).build()) .build(); - val numParams = nnc.getLayer().initializer().numParams(nnc); + val numParams = nnc.getFirstLayer().initializer().numParams(nnc); INDArray params = Nd4j.create(1, numParams); ConvolutionLayer layer = - (ConvolutionLayer) nnc.getLayer().instantiate(nnc, null, 0, params, true, params.dataType()); + (ConvolutionLayer) nnc.getFirstLayer().instantiate(nnc, null, 0, params, true, params.dataType()); layer.setInputMiniBatchSize(miniBatchSize); val shape_rnn = new long[] {miniBatchSize, nChannels * inputHeight * inputWidth, @@ -396,8 +395,8 @@ public class TestPreProcessors extends BaseDL4JTest { @Test public void testAutoAdditionOfPreprocessors() { //FF->RNN and RNN->FF - MultiLayerConfiguration conf1 = - new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration conf1 = + NeuralNetConfiguration.builder() .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(5) .nOut(6).build()) .layer(1, new GravesLSTM.Builder().nIn(6).nOut(7).build()) @@ -412,12 +411,12 @@ public class TestPreProcessors extends BaseDL4JTest { //FF-> CNN, CNN-> FF, FF->RNN - MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder() .layer(0, new org.deeplearning4j.nn.conf.layers.ConvolutionLayer.Builder().nOut(10) .kernelSize(5, 5).stride(1, 1).build()) .layer(1, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nOut(6).build()) .layer(2, new RnnOutputLayer.Builder().nIn(6).nOut(5).activation(Activation.SOFTMAX).build()) - .setInputType(InputType.convolutionalFlat(28, 28, 1)).build(); + .inputType(InputType.convolutionalFlat(28, 28, 1)).build(); //Expect preprocessors: 0: FF->CNN; 1: CNN->FF; 2: FF->RNN assertEquals(3, conf2.getInputPreProcessors().size()); assertTrue(conf2.getInputPreProcess(0) instanceof FeedForwardToCnnPreProcessor); @@ -425,12 +424,12 @@ public class TestPreProcessors extends BaseDL4JTest { assertTrue(conf2.getInputPreProcess(2) instanceof FeedForwardToRnnPreProcessor); //CNN-> FF, FF->RNN - InputType.convolutional instead of convolutionalFlat - MultiLayerConfiguration conf2a = new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration conf2a = NeuralNetConfiguration.builder() .layer(0, new org.deeplearning4j.nn.conf.layers.ConvolutionLayer.Builder().nOut(10) .kernelSize(5, 5).stride(1, 1).build()) .layer(1, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nOut(6).build()) .layer(2, new RnnOutputLayer.Builder().nIn(6).nOut(5).activation(Activation.SOFTMAX).build()) - .setInputType(InputType.convolutional(28, 28, 1)).build(); + .inputType(InputType.convolutional(28, 28, 1)).build(); //Expect preprocessors: 1: CNN->FF; 2: FF->RNN assertEquals(2, conf2a.getInputPreProcessors().size()); assertTrue(conf2a.getInputPreProcess(1) instanceof CnnToFeedForwardPreProcessor); @@ -438,12 +437,12 @@ public class TestPreProcessors extends BaseDL4JTest { //FF->CNN and CNN->RNN: - MultiLayerConfiguration conf3 = new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration conf3 = NeuralNetConfiguration.builder().list() .layer(0, new org.deeplearning4j.nn.conf.layers.ConvolutionLayer.Builder().nOut(10) .kernelSize(5, 5).stride(1, 1).build()) .layer(1, new GravesLSTM.Builder().nOut(6).build()) .layer(2, new RnnOutputLayer.Builder().nIn(6).nOut(5).activation(Activation.SOFTMAX).build()) - .setInputType(InputType.convolutionalFlat(28, 28, 1)).build(); + .inputType(InputType.convolutionalFlat(28, 28, 1)).build(); //Expect preprocessors: 0: FF->CNN, 1: CNN->RNN; assertEquals(2, conf3.getInputPreProcessors().size()); assertTrue(conf3.getInputPreProcess(0) instanceof FeedForwardToCnnPreProcessor); @@ -452,8 +451,8 @@ public class TestPreProcessors extends BaseDL4JTest { @Test public void testCnnToDense() { - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() .list().layer(0, new org.deeplearning4j.nn.conf.layers.ConvolutionLayer.Builder( 4, 4) // 28*28*1 => 15*15*10 @@ -467,7 +466,7 @@ public class TestPreProcessors extends BaseDL4JTest { .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(200) .nOut(5).weightInit(WeightInit.RELU) .activation(Activation.SOFTMAX).build()) - .setInputType(InputType.convolutionalFlat(28, 28, 1)) + .inputType(InputType.convolutionalFlat(28, 28, 1)) .build(); assertNotNull(conf.getInputPreProcess(0)); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/weightnoise/TestWeightNoise.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/weightnoise/TestWeightNoise.java index d4bae91a6..4d4b36013 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/weightnoise/TestWeightNoise.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/weightnoise/TestWeightNoise.java @@ -27,7 +27,6 @@ import org.deeplearning4j.TestUtils; import org.deeplearning4j.datasets.iterator.ExistingDataSetIterator; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.layers.BaseLayer; @@ -65,9 +64,9 @@ public class TestWeightNoise extends BaseDL4JTest { }; for (IWeightNoise wn : weightNoises) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .weightNoise(wn) - .list() + .layer(new DenseLayer.Builder().nIn(10).nOut(10).build()) .layer(new DenseLayer.Builder().nIn(10).nOut(10).weightNoise(new DropConnect(0.25)).build()) .layer(new OutputLayer.Builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build()) @@ -76,14 +75,14 @@ public class TestWeightNoise extends BaseDL4JTest { MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); - assertEquals(wn, ((BaseLayer) net.getLayer(0).conf().getLayer()).getWeightNoise()); - assertEquals(new DropConnect(0.25), ((BaseLayer) net.getLayer(1).conf().getLayer()).getWeightNoise()); - assertEquals(wn, ((BaseLayer) net.getLayer(2).conf().getLayer()).getWeightNoise()); + assertEquals(wn, ((BaseLayer) net.getLayer(0).getLayerConfiguration()).getWeightNoise()); + assertEquals(new DropConnect(0.25), ((BaseLayer) net.getLayer(1).getLayerConfiguration()).getWeightNoise()); + assertEquals(wn, ((BaseLayer) net.getLayer(2).getLayerConfiguration()).getWeightNoise()); TestUtils.testModelSerialization(net); - ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf2 = NeuralNetConfiguration.builder() .weightNoise(wn) .graphBuilder() .addInputs("in") @@ -96,9 +95,9 @@ public class TestWeightNoise extends BaseDL4JTest { ComputationGraph graph = new ComputationGraph(conf2); graph.init(); - assertEquals(wn, ((BaseLayer) graph.getLayer(0).conf().getLayer()).getWeightNoise()); - assertEquals(new DropConnect(0.25), ((BaseLayer) graph.getLayer(1).conf().getLayer()).getWeightNoise()); - assertEquals(wn, ((BaseLayer) graph.getLayer(2).conf().getLayer()).getWeightNoise()); + assertEquals(wn, ((BaseLayer) graph.getLayer(0).getLayerConfiguration()).getWeightNoise()); + assertEquals(new DropConnect(0.25), ((BaseLayer) graph.getLayer(1).getLayerConfiguration()).getWeightNoise()); + assertEquals(wn, ((BaseLayer) graph.getLayer(2).getLayerConfiguration()).getWeightNoise()); TestUtils.testModelSerialization(graph); @@ -144,8 +143,8 @@ public class TestWeightNoise extends BaseDL4JTest { List list = Arrays.asList(wn1, wn2, wn3); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() + .layer(new DenseLayer.Builder().nIn(10).nOut(10).weightNoise(wn1).build()) .layer(new DenseLayer.Builder().nIn(10).nOut(10).weightNoise(wn2).build()) .layer(new OutputLayer.Builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).weightNoise(wn3).build()) @@ -168,7 +167,7 @@ public class TestWeightNoise extends BaseDL4JTest { wn3 = new CustomWeightNoise(); list = Arrays.asList(wn1, wn2, wn3); - ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf2 = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") .layer("0", new DenseLayer.Builder().nIn(10).nOut(10).weightNoise(wn1).build(), "in") @@ -247,9 +246,9 @@ public class TestWeightNoise extends BaseDL4JTest { public void testDropConnectValues() { Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .weightInit(WeightInit.ONES) - .list() + .layer(new OutputLayer.Builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/dtypes/DTypeTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/dtypes/DTypeTests.java index edad9fb7d..9002ba2af 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/dtypes/DTypeTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/dtypes/DTypeTests.java @@ -30,7 +30,6 @@ import org.deeplearning4j.common.config.DL4JClassLoading; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.InputPreProcessor; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.RNNFormat; import org.deeplearning4j.nn.conf.WorkspaceMode; @@ -82,7 +81,7 @@ import org.deeplearning4j.nn.conf.layers.GlobalPoolingLayer; import org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM; import org.deeplearning4j.nn.conf.layers.GravesLSTM; import org.deeplearning4j.nn.conf.layers.LSTM; -import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.LearnedSelfAttentionLayer; import org.deeplearning4j.nn.conf.layers.LocalResponseNormalization; import org.deeplearning4j.nn.conf.layers.LocallyConnected1D; @@ -141,7 +140,6 @@ import org.deeplearning4j.nn.modelimport.keras.preprocessors.ReshapePreprocessor import org.deeplearning4j.nn.modelimport.keras.preprocessors.TensorFlowCnnToFeedForwardPreProcessor; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.weights.WeightInit; -import org.deeplearning4j.nn.weights.WeightInitDistribution; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Disabled; @@ -216,7 +214,7 @@ public class DTypeTests extends BaseDL4JTest { continue; } - if (Layer.class.isAssignableFrom(clazz)) { + if (LayerConfiguration.class.isAssignableFrom(clazz)) { layerClasses.add(clazz); } else if (InputPreProcessor.class.isAssignableFrom(clazz)) { preprocClasses.add(clazz); @@ -258,9 +256,9 @@ public class DTypeTests extends BaseDL4JTest { } public static void logUsedClasses(MultiLayerNetwork net) { - MultiLayerConfiguration conf = net.getLayerWiseConfigurations(); - for (NeuralNetConfiguration nnc : conf.getConfs()) { - Layer l = nnc.getLayer(); + NeuralNetConfiguration conf = net.getConfiguration(); + for (NeuralNetConfiguration nnc : conf.getNetConfigurations()) { + LayerConfiguration l = nnc.getFirstLayer(); seenLayers.add(l.getClass()); if (l instanceof BaseWrapperLayer) { BaseWrapperLayer bwl = (BaseWrapperLayer) l; @@ -283,7 +281,7 @@ public class DTypeTests extends BaseDL4JTest { for (GraphVertex gv : conf.getVertices().values()) { seenVertices.add(gv.getClass()); if (gv instanceof LayerVertex) { - seenLayers.add(((LayerVertex) gv).getLayerConf().getLayer().getClass()); + seenLayers.add(((LayerVertex) gv).getNetConfiguration().getFirstLayer().getClass()); InputPreProcessor ipp = ((LayerVertex) gv).getPreProcessor(); if (ipp != null) { seenPreprocs.add(ipp.getClass()); @@ -301,7 +299,7 @@ public class DTypeTests extends BaseDL4JTest { for (DataType dt : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) { Nd4j.setDefaultDataTypes(dt, dt); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .seed(12345) .weightInit(WeightInit.XAVIER) .updater(new Adam(0.01)) @@ -384,7 +382,7 @@ public class DTypeTests extends BaseDL4JTest { for (DataType dt : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) { Nd4j.setDefaultDataTypes(dt, dt); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .seed(12345) .weightInit(WeightInit.XAVIER) .updater(new Adam(0.01)) @@ -475,8 +473,8 @@ public class DTypeTests extends BaseDL4JTest { String msg = "Global dtype: " + globalDtype + ", network dtype: " + networkDtype + ", outputLayer=" + outputLayer; - Layer ol; - Layer secondLast; + LayerConfiguration ol; + LayerConfiguration secondLast; switch (outputLayer) { case 0: ol = new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(); @@ -503,7 +501,7 @@ public class DTypeTests extends BaseDL4JTest { } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(networkDtype) .convolutionMode(ConvolutionMode.Same) .updater(new Adam(1e-2)) @@ -531,7 +529,7 @@ public class DTypeTests extends BaseDL4JTest { .layer(new ActivationLayer(Activation.LEAKYRELU)) .layer(secondLast) .layer(ol) - .setInputType(InputType.convolutionalFlat(8, 8, 1)) + .inputType(InputType.convolutionalFlat(8, 8, 1)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -560,7 +558,7 @@ public class DTypeTests extends BaseDL4JTest { assertEquals(networkDtype, out.dataType(), msg); List ff = net.feedForward(in); for (int i = 0; i < ff.size(); i++) { - String s = msg + " - layer " + (i - 1) + " - " + (i == 0 ? "input" : net.getLayer(i - 1).conf().getLayer().getClass().getSimpleName()); + String s = msg + " - layer " + (i - 1) + " - " + (i == 0 ? "input" : net.getLayer(i - 1).getLayerConfiguration().getClass().getSimpleName()); assertEquals(networkDtype, ff.get(i).dataType(), msg); } @@ -601,8 +599,8 @@ public class DTypeTests extends BaseDL4JTest { String msg = "Global dtype: " + globalDtype + ", network dtype: " + networkDtype + ", outputLayer=" + outputLayer; log.info(msg); - Layer ol; - Layer secondLast; + LayerConfiguration ol; + LayerConfiguration secondLast; switch (outputLayer) { case 0: ol = new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(); @@ -621,7 +619,7 @@ public class DTypeTests extends BaseDL4JTest { } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(networkDtype) .convolutionMode(ConvolutionMode.Same) .updater(new Nesterovs(1e-2, 0.9)) @@ -636,7 +634,7 @@ public class DTypeTests extends BaseDL4JTest { .layer(new Upsampling3D.Builder().size(2).build()) .layer(secondLast) .layer(ol) - .setInputType(InputType.convolutional3D(Convolution3D.DataFormat.NCDHW, 8, 8, 8, 1)) + .inputType(InputType.convolutional3D(Convolution3D.DataFormat.NCDHW, 8, 8, 8, 1)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -664,7 +662,7 @@ public class DTypeTests extends BaseDL4JTest { assertEquals(networkDtype, out.dataType(), msg); List ff = net.feedForward(in); for (int i = 0; i < ff.size(); i++) { - String s = msg + " - layer " + (i - 1) + " - " + (i == 0 ? "input" : net.getLayer(i - 1).conf().getLayer().getClass().getSimpleName()); + String s = msg + " - layer " + (i - 1) + " - " + (i == 0 ? "input" : net.getLayer(i - 1).getLayerConfiguration().getClass().getSimpleName()); assertEquals(networkDtype, ff.get(i).dataType(), s); } @@ -712,8 +710,8 @@ public class DTypeTests extends BaseDL4JTest { String msg = "Global dtype: " + globalDtype + ", network dtype: " + networkDtype + ", outputLayer=" + outputLayer + " at index " + outputLayer; - Layer ol; - Layer secondLast; + LayerConfiguration ol; + LayerConfiguration secondLast; switch (outputLayer) { case 0: ol = new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(); @@ -732,7 +730,7 @@ public class DTypeTests extends BaseDL4JTest { } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .trainingWorkspaceMode(WorkspaceMode.NONE) .inferenceWorkspaceMode(WorkspaceMode.NONE) .dataType(networkDtype) @@ -749,7 +747,7 @@ public class DTypeTests extends BaseDL4JTest { .layer(new Upsampling1D.Builder(2).build()) .layer(secondLast) .layer(ol) - .setInputType(InputType.recurrent(5, 10,RNNFormat.NCW)) + .inputType(InputType.recurrent(5, 10,RNNFormat.NCW)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -774,7 +772,7 @@ public class DTypeTests extends BaseDL4JTest { assertEquals(networkDtype, out.dataType(), msg); List ff = net.feedForward(in); for (int i = 0; i < ff.size(); i++) { - String s = msg + " - layer " + (i - 1) + " - " + (i == 0 ? "input" : net.getLayer(i - 1).conf().getLayer().getClass().getSimpleName()); + String s = msg + " - layer " + (i - 1) + " - " + (i == 0 ? "input" : net.getLayer(i - 1).getLayerConfiguration().getClass().getSimpleName()); assertEquals(networkDtype, ff.get(i).dataType(), s); } @@ -814,7 +812,7 @@ public class DTypeTests extends BaseDL4JTest { String msg = "Global dtype: " + globalDtype + ", network dtype: " + networkDtype; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(networkDtype) .convolutionMode(ConvolutionMode.Same) .updater(new Adam(1e-2)) @@ -822,7 +820,7 @@ public class DTypeTests extends BaseDL4JTest { .layer(new SpaceToBatchLayer.Builder().blocks(1, 1).build()) .layer(new SpaceToDepthLayer.Builder().blocks(2).build()) .layer(new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build()) - .setInputType(InputType.convolutional(28, 28, 5)) + .inputType(InputType.convolutional(28, 28, 5)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -840,7 +838,7 @@ public class DTypeTests extends BaseDL4JTest { assertEquals(networkDtype, out.dataType(), msg); List ff = net.feedForward(in); for (int i = 0; i < ff.size(); i++) { - String s = msg + " - layer " + (i - 1) + " - " + (i == 0 ? "input" : net.getLayer(i - 1).conf().getLayer().getClass().getSimpleName()); + String s = msg + " - layer " + (i - 1) + " - " + (i == 0 ? "input" : net.getLayer(i - 1).getLayerConfiguration().getClass().getSimpleName()); assertEquals(networkDtype, ff.get(i).dataType(), s); } @@ -878,8 +876,8 @@ public class DTypeTests extends BaseDL4JTest { String msg = "Global dtype: " + globalDtype + ", network dtype: " + networkDtype + ", outputLayer=" + outputLayer; - Layer ol; - Layer secondLast; + LayerConfiguration ol; + LayerConfiguration secondLast; switch (outputLayer) { case 0: ol = new RnnOutputLayer.Builder().nOut(5).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(); @@ -897,7 +895,7 @@ public class DTypeTests extends BaseDL4JTest { throw new RuntimeException(); } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(networkDtype) .convolutionMode(ConvolutionMode.Same) .updater(new Adam(1e-2)) @@ -982,12 +980,12 @@ public class DTypeTests extends BaseDL4JTest { int width = 6; int inputDepth = 4; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(networkDtype) .seed(123) .updater(new NoOp()) - .weightInit(new WeightInitDistribution(new UniformDistribution(-6, 6))) - .list() + .dist(new UniformDistribution(-6, 6)) + .layer(new PrimaryCapsules.Builder(primaryCapsDim, primarpCapsChannel) .kernelSize(3, 3) .stride(2, 2) @@ -996,7 +994,7 @@ public class DTypeTests extends BaseDL4JTest { .layer(new CapsuleStrengthLayer.Builder().build()) .layer(new ActivationLayer.Builder(new ActivationSoftmax()).build()) .layer(new LossLayer.Builder(new LossNegativeLogLikelihood()).build()) - .setInputType(InputType.convolutional(height, width, inputDepth)) + .inputType(InputType.convolutional(height, width, inputDepth)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -1013,7 +1011,7 @@ public class DTypeTests extends BaseDL4JTest { assertEquals(networkDtype, out.dataType(), msg); List ff = net.feedForward(in); for (int i = 0; i < ff.size(); i++) { - String s = msg + " - layer " + (i - 1) + " - " + (i == 0 ? "input" : net.getLayer(i - 1).conf().getLayer().getClass().getSimpleName()); + String s = msg + " - layer " + (i - 1) + " - " + (i == 0 ? "input" : net.getLayer(i - 1).getLayerConfiguration().getClass().getSimpleName()); assertEquals(networkDtype, ff.get(i).dataType(), s); } @@ -1052,11 +1050,11 @@ public class DTypeTests extends BaseDL4JTest { String msg = "Global dtype: " + globalDtype + ", network dtype: " + networkDtype + ", test=" + test; - ComputationGraphConfiguration.GraphBuilder conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration.GraphBuilder conf = NeuralNetConfiguration.builder() .dataType(networkDtype) .seed(123) .updater(new NoOp()) - .weightInit(new WeightInitDistribution(new UniformDistribution(-6, 6))) + .dist(new UniformDistribution(-6, 6)) .graphBuilder() .addInputs("in") .setOutputs("out"); @@ -1144,7 +1142,7 @@ public class DTypeTests extends BaseDL4JTest { for (int test = 0; test < 8; test++) { String msg = "Global dtype: " + globalDtype + ", network dtype: " + networkDtype + ", test=" + test; - ComputationGraphConfiguration.GraphBuilder b = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration.GraphBuilder b = NeuralNetConfiguration.builder() .dataType(networkDtype) .seed(123) .updater(new NoOp()) @@ -1301,7 +1299,7 @@ public class DTypeTests extends BaseDL4JTest { for (int test = 0; test < 2; test++) { String msg = "Global dtype: " + globalDtype + ", network dtype: " + networkDtype + ", test=" + test; - ComputationGraphConfiguration.GraphBuilder b = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration.GraphBuilder b = NeuralNetConfiguration.builder() .dataType(networkDtype) .seed(123) .updater(new NoOp()) @@ -1395,7 +1393,7 @@ public class DTypeTests extends BaseDL4JTest { INDArray in = Nd4j.rand(networkDtype, new long[]{mb, nIn, tsLength}); INDArray labels = TestUtils.randomOneHot(mb, nOut); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(networkDtype) .activation(Activation.TANH) .updater(new NoOp()) @@ -1408,7 +1406,7 @@ public class DTypeTests extends BaseDL4JTest { .layer(new GlobalPoolingLayer.Builder().poolingType(PoolingType.MAX).build()) .layer(new OutputLayer.Builder().nOut(nOut).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) - .setInputType(InputType.recurrent(nIn)) + .inputType(InputType.recurrent(nIn)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -1418,7 +1416,7 @@ public class DTypeTests extends BaseDL4JTest { assertEquals( networkDtype, out.dataType(), msg); List ff = net.feedForward(in); for (int i = 0; i < ff.size(); i++) { - String s = msg + " - layer " + (i - 1) + " - " + (i == 0 ? "input" : net.getLayer(i - 1).conf().getLayer().getClass().getSimpleName()); + String s = msg + " - layer " + (i - 1) + " - " + (i == 0 ? "input" : net.getLayer(i - 1).getLayerConfiguration().getClass().getSimpleName()); assertEquals(networkDtype, ff.get(i).dataType(), s); } @@ -1482,7 +1480,7 @@ public class DTypeTests extends BaseDL4JTest { System.out.println("Starting test: " + name); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .dataType(networkDtype) .activation(Activation.TANH) .updater(new NoOp()) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/ComputationGraphTestRNN.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/ComputationGraphTestRNN.java index 2d2379fdb..de8c16075 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/ComputationGraphTestRNN.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/ComputationGraphTestRNN.java @@ -66,7 +66,7 @@ public class ComputationGraphTestRNN extends BaseDL4JTest { int timeSeriesLength = 12; //4 layer network: 2 GravesLSTM + DenseLayerConfiguration + RnnOutputLayer. Hence also tests preprocessors. - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).graphBuilder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345).graphBuilder() .addInputs("in") .addLayer("0", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(5).nOut(7) .activation(Activation.TANH) @@ -156,7 +156,7 @@ public class ComputationGraphTestRNN extends BaseDL4JTest { Nd4j.getRandom().setSeed(12345); int timeSeriesLength = 6; - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().graphBuilder().addInputs("in") + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().graphBuilder().addInputs("in") .addLayer("0", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(5).nOut(7) .activation(Activation.TANH) .dist(new NormalDistribution(0, 0.5)).build(), "in") @@ -211,7 +211,7 @@ public class ComputationGraphTestRNN extends BaseDL4JTest { //4 layer network: 2 GravesLSTM + DenseLayerConfiguration + RnnOutputLayer. Hence also tests preprocessors. //Network architecture: lstm0 -> Dense -> RnnOutputLayer0 // and lstm1 -> Dense -> RnnOutputLayer1 - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).graphBuilder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345).graphBuilder() .addInputs("in0", "in1") .addLayer("lstm0", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(5).nOut(6) @@ -340,7 +340,7 @@ public class ComputationGraphTestRNN extends BaseDL4JTest { int nIn = 5; int nOut = 4; - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .trainingWorkspaceMode(WorkspaceMode.NONE).inferenceWorkspaceMode(WorkspaceMode.NONE) .graphBuilder() .addInputs("in") @@ -360,7 +360,7 @@ public class ComputationGraphTestRNN extends BaseDL4JTest { .setOutputs("out").build(); assertEquals(BackpropType.Standard, conf.getBackpropType()); - ComputationGraphConfiguration confTBPTT = new NeuralNetConfiguration.Builder().seed(12345) + ComputationGraphConfiguration confTBPTT = NeuralNetConfiguration.builder().seed(12345) .trainingWorkspaceMode(WorkspaceMode.NONE).inferenceWorkspaceMode(WorkspaceMode.NONE) .graphBuilder() .addInputs("in") @@ -377,7 +377,7 @@ public class ComputationGraphTestRNN extends BaseDL4JTest { .activation(Activation.SOFTMAX) .dist(new NormalDistribution(0, 0.5)).build(), "1") .setOutputs("out").backpropType(BackpropType.TruncatedBPTT) - .tBPTTForwardLength(timeSeriesLength).tBPTTBackwardLength(timeSeriesLength) + .tbpttFwdLength(timeSeriesLength).tbpttBackLength(timeSeriesLength) .setInputTypes(InputType.recurrent(nIn,timeSeriesLength,RNNFormat.NCW)) .build(); assertEquals(BackpropType.TruncatedBPTT, confTBPTT.getBackpropType()); @@ -456,7 +456,7 @@ public class ComputationGraphTestRNN extends BaseDL4JTest { int nTimeSlices = 20; - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).graphBuilder() .addInputs("in") .addLayer("0", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(7) @@ -473,7 +473,7 @@ public class ComputationGraphTestRNN extends BaseDL4JTest { .dist(new NormalDistribution(0, 0.5)).build(), "1") .setOutputs("out").backpropType(BackpropType.TruncatedBPTT) .setInputTypes(InputType.recurrent(nIn,timeSeriesLength,RNNFormat.NCW)) - .tBPTTBackwardLength(timeSeriesLength).tBPTTForwardLength(timeSeriesLength).build(); + .tbpttBackLength(timeSeriesLength).tbpttFwdLength(timeSeriesLength).build(); Nd4j.getRandom().setSeed(12345); ComputationGraph graph = new ComputationGraph(conf); @@ -493,7 +493,7 @@ public class ComputationGraphTestRNN extends BaseDL4JTest { int nIn = 5; int nOut = 4; - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).graphBuilder() .addInputs("in") .addLayer("0", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(7) @@ -509,7 +509,7 @@ public class ComputationGraphTestRNN extends BaseDL4JTest { .activation(Activation.SOFTMAX) .dist(new NormalDistribution(0, 0.5)).build(), "1") .setOutputs("out").backpropType(BackpropType.TruncatedBPTT) - .tBPTTBackwardLength(tbpttLength).tBPTTForwardLength(tbpttLength) + .tbpttBackLength(tbpttLength).tbpttFwdLength(tbpttLength) .setInputTypes(InputType.recurrent(nIn,timeSeriesLength, RNNFormat.NCW)) .build(); @@ -530,13 +530,13 @@ public class ComputationGraphTestRNN extends BaseDL4JTest { @Test public void testTbpttMasking() { //Simple "does it throw an exception" type test... - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .graphBuilder().addInputs("in") .addLayer("out", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MSE) .activation(Activation.IDENTITY).nIn(1).nOut(1).build(), "in") - .setOutputs("out").backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(8) + .setOutputs("out").backpropType(BackpropType.TruncatedBPTT).tbpttFwdLength(8) .setInputTypes(InputType.recurrent(1,1,RNNFormat.NCW)) - .tBPTTBackwardLength(8).build(); + .tbpttBackLength(8).build(); ComputationGraph net = new ComputationGraph(conf); net.init(); @@ -553,12 +553,12 @@ public class ComputationGraphTestRNN extends BaseDL4JTest { public void checkMaskArrayClearance() { for (boolean tbptt : new boolean[] {true, false}) { //Simple "does it throw an exception" type test... - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .graphBuilder().addInputs("in") .addLayer("out", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MSE) .activation(Activation.IDENTITY).nIn(1).nOut(1).build(), "in") .setOutputs("out").backpropType(tbptt ? BackpropType.TruncatedBPTT : BackpropType.Standard) - .tBPTTForwardLength(8).tBPTTBackwardLength(8).build(); + .tbpttFwdLength(8).tbpttBackLength(8).build(); ComputationGraph net = new ComputationGraph(conf); net.init(); @@ -616,7 +616,7 @@ public class ComputationGraphTestRNN extends BaseDL4JTest { int nHiddenUnits = 17; try { - new NeuralNetConfiguration.Builder() + NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") .layer("0", new org.deeplearning4j.nn.conf.layers.LSTM.Builder().nIn(nIn).nOut(nHiddenUnits).build(), "in") diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestCompGraphCNN.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestCompGraphCNN.java index 95691fed6..d83f4ac17 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestCompGraphCNN.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestCompGraphCNN.java @@ -57,7 +57,7 @@ public class TestCompGraphCNN extends BaseDL4JTest { protected static ComputationGraphConfiguration getMultiInputGraphConfig() { ComputationGraphConfiguration conf = - new NeuralNetConfiguration.Builder() + NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .graphBuilder().addInputs("input") .setInputTypes(InputType.convolutional(32, 32, 3)) @@ -154,7 +154,7 @@ public class TestCompGraphCNN extends BaseDL4JTest { DataSet trainInput; ComputationGraphConfiguration conf = - new NeuralNetConfiguration.Builder() + NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .seed(123).graphBuilder().addInputs("input") .setInputTypes(InputType.convolutional(nChannels, imageWidth, diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestCompGraphUnsupervised.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestCompGraphUnsupervised.java index 794538c36..f4da77575 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestCompGraphUnsupervised.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestCompGraphUnsupervised.java @@ -24,7 +24,6 @@ import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.datasets.iterator.EarlyTerminationDataSetIterator; import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.WorkspaceMode; import org.deeplearning4j.nn.conf.layers.variational.BernoulliReconstructionDistribution; @@ -60,7 +59,7 @@ public class TestCompGraphUnsupervised extends BaseDL4JTest { for(WorkspaceMode wsm : new WorkspaceMode[]{WorkspaceMode.NONE, WorkspaceMode.ENABLED}) { - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .seed(12345) .updater(new Adam(1e-3)) .weightInit(WeightInit.XAVIER) @@ -136,13 +135,13 @@ public class TestCompGraphUnsupervised extends BaseDL4JTest { for(WorkspaceMode wsm : new WorkspaceMode[]{WorkspaceMode.NONE, WorkspaceMode.ENABLED}) { - MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder() .seed(12345) .updater(new Adam(1e-3)) .weightInit(WeightInit.XAVIER) .inferenceWorkspaceMode(wsm) .trainingWorkspaceMode(wsm) - .list() + .layer(new VariationalAutoencoder.Builder() .nIn(784) .nOut(32) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestComputationGraphNetwork.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestComputationGraphNetwork.java index a6373c6a9..adf347260 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestComputationGraphNetwork.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestComputationGraphNetwork.java @@ -98,7 +98,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { public File testDir; private static ComputationGraphConfiguration getIrisGraphConfiguration() { - return new NeuralNetConfiguration.Builder().seed(12345) + return NeuralNetConfiguration.builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).graphBuilder() .addInputs("input") .addLayer("firstLayer", new DenseLayer.Builder().nIn(4).nOut(5).build(), "input") @@ -106,9 +106,9 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { .setOutputs("outputLayer").build(); } - private static MultiLayerConfiguration getIrisMLNConfiguration() { - return new NeuralNetConfiguration.Builder().seed(12345) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() + private static NeuralNetConfiguration getIrisMLNConfiguration() { + return NeuralNetConfiguration.builder().seed(12345) + .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .layer(0, new DenseLayer.Builder().nIn(4).nOut(5).build()) .layer(1, new OutputLayer.Builder().nIn(5).nOut(3).activation(Activation.SOFTMAX).build()).build(); } @@ -150,7 +150,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { ComputationGraph graph = new ComputationGraph(configuration); graph.init(); - MultiLayerConfiguration mlc = getIrisMLNConfiguration(); + NeuralNetConfiguration mlc = getIrisMLNConfiguration(); MultiLayerNetwork net = new MultiLayerNetwork(mlc); net.init(); @@ -209,7 +209,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { ComputationGraph graph = new ComputationGraph(configuration); graph.init(); - MultiLayerConfiguration mlc = getIrisMLNConfiguration(); + NeuralNetConfiguration mlc = getIrisMLNConfiguration(); MultiLayerNetwork net = new MultiLayerNetwork(mlc); net.init(); @@ -244,7 +244,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { ComputationGraph graph = new ComputationGraph(configuration); graph.init(); - MultiLayerConfiguration mlc = getIrisMLNConfiguration(); + NeuralNetConfiguration mlc = getIrisMLNConfiguration(); MultiLayerNetwork net = new MultiLayerNetwork(mlc); net.init(); @@ -295,7 +295,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { ComputationGraph graph = new ComputationGraph(configuration); graph.init(); - MultiLayerConfiguration mlnConfig = getIrisMLNConfiguration(); + NeuralNetConfiguration mlnConfig = getIrisMLNConfiguration(); MultiLayerNetwork net = new MultiLayerNetwork(mlnConfig); net.init(); @@ -332,7 +332,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { MultiDataSetIterator iter = new RecordReaderMultiDataSetIterator.Builder(10).addReader("iris", rr) .addInput("iris", 0, 3).addOutputOneHot("iris", 4, 3).build(); - ComputationGraphConfiguration config = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration config = NeuralNetConfiguration.builder() .updater(new Sgd(0.1)) .graphBuilder().addInputs("in") .addLayer("dense", new DenseLayer.Builder().nIn(4).nOut(2).build(), "in").addLayer("out", @@ -377,7 +377,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { ComputationGraph graph = new ComputationGraph(configuration); graph.init(); - MultiLayerConfiguration mlc = getIrisMLNConfiguration(); + NeuralNetConfiguration mlc = getIrisMLNConfiguration(); MultiLayerNetwork net = new MultiLayerNetwork(mlc); net.init(); @@ -401,14 +401,14 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { public void testPreprocessorAddition() { //Also check that nIns are set automatically //First: check FF -> RNN - ComputationGraphConfiguration conf1 = new NeuralNetConfiguration.Builder().graphBuilder().addInputs("in") + ComputationGraphConfiguration conf1 = NeuralNetConfiguration.builder().graphBuilder().addInputs("in") .setInputTypes(InputType.feedForward(5)) .addLayer("rnn", new GravesLSTM.Builder().nOut(5).build(), "in") .addLayer("out", new RnnOutputLayer.Builder().nOut(5).activation(Activation.SOFTMAX).build(), "rnn").setOutputs("out").build(); - assertEquals(5, ((FeedForwardLayer) ((LayerVertex) conf1.getVertices().get("rnn")).getLayerConf().getLayer()) + assertEquals(5, ((FeedForwardLayer) ((LayerVertex) conf1.getVertices().get("rnn")).getNetConfiguration().getFirstLayer()) .getNIn()); - assertEquals(5, ((FeedForwardLayer) ((LayerVertex) conf1.getVertices().get("out")).getLayerConf().getLayer()) + assertEquals(5, ((FeedForwardLayer) ((LayerVertex) conf1.getVertices().get("out")).getNetConfiguration().getFirstLayer()) .getNIn()); LayerVertex lv1 = (LayerVertex) conf1.getVertices().get("rnn"); @@ -417,15 +417,15 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { assertNull(lv2.getPreProcessor()); //Check RNN -> FF -> RNN - ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder().graphBuilder().addInputs("in") + ComputationGraphConfiguration conf2 = NeuralNetConfiguration.builder().graphBuilder().addInputs("in") .setInputTypes(InputType.recurrent(5)) .addLayer("ff", new DenseLayer.Builder().nOut(5).build(), "in") .addLayer("out", new RnnOutputLayer.Builder().nOut(5).activation(Activation.SOFTMAX).build(), "ff") .setOutputs("out").build(); - assertEquals(5, ((FeedForwardLayer) ((LayerVertex) conf2.getVertices().get("ff")).getLayerConf().getLayer()) + assertEquals(5, ((FeedForwardLayer) ((LayerVertex) conf2.getVertices().get("ff")).getNetConfiguration().getFirstLayer()) .getNIn()); - assertEquals(5, ((FeedForwardLayer) ((LayerVertex) conf2.getVertices().get("out")).getLayerConf().getLayer()) + assertEquals(5, ((FeedForwardLayer) ((LayerVertex) conf2.getVertices().get("out")).getNetConfiguration().getFirstLayer()) .getNIn()); lv1 = (LayerVertex) conf2.getVertices().get("ff"); @@ -434,7 +434,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { assertTrue(lv2.getPreProcessor() instanceof FeedForwardToRnnPreProcessor); //CNN -> Dense - ComputationGraphConfiguration conf3 = new NeuralNetConfiguration.Builder().graphBuilder().addInputs("in") + ComputationGraphConfiguration conf3 = NeuralNetConfiguration.builder().graphBuilder().addInputs("in") .setInputTypes(InputType.convolutional(28, 28, 1)) .addLayer("cnn", new ConvolutionLayer.Builder().kernelSize(2, 2).padding(0, 0).stride(2, 2) .nOut(3).build(), "in") //(28-2+0)/2+1 = 14 @@ -460,11 +460,11 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { LayerVertex lv4 = (LayerVertex) conf3.getVertices().get("out"); assertNull(lv4.getPreProcessor()); //Check nIns: - assertEquals(7 * 7 * 3, ((FeedForwardLayer) lv3.getLayerConf().getLayer()).getNIn()); + assertEquals(7 * 7 * 3, ((FeedForwardLayer) lv3.getNetConfiguration().getFirstLayer()).getNIn()); //CNN->Dense, RNN->Dense, Dense->RNN ComputationGraphConfiguration conf4 = - new NeuralNetConfiguration.Builder().graphBuilder().addInputs("inCNN", "inRNN") + NeuralNetConfiguration.builder().graphBuilder().addInputs("inCNN", "inRNN") .setInputTypes(InputType.convolutional(28, 28, 1), InputType.recurrent(5)) .addLayer("cnn", new ConvolutionLayer.Builder().kernelSize(2, 2).padding(0, 0) .stride(2, 2).nOut(3).build(), "inCNN") //(28-2+0)/2+1 = 14 @@ -495,14 +495,14 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { LayerVertex lv5 = (LayerVertex) conf4.getVertices().get("out"); assertTrue(lv5.getPreProcessor() instanceof FeedForwardToRnnPreProcessor); //Check nIns: - assertEquals(7 * 7 * 3, ((FeedForwardLayer) lv3.getLayerConf().getLayer()).getNIn()); - assertEquals(5, ((FeedForwardLayer) lv4.getLayerConf().getLayer()).getNIn()); - assertEquals(20, ((FeedForwardLayer) lv5.getLayerConf().getLayer()).getNIn()); //10+10 out of the merge vertex -> 20 in to output layer vertex + assertEquals(7 * 7 * 3, ((FeedForwardLayer) lv3.getNetConfiguration().getFirstLayer()).getNIn()); + assertEquals(5, ((FeedForwardLayer) lv4.getNetConfiguration().getFirstLayer()).getNIn()); + assertEquals(20, ((FeedForwardLayer) lv5.getNetConfiguration().getFirstLayer()).getNIn()); //10+10 out of the merge vertex -> 20 in to output layer vertex //Input to 2 CNN layers: ComputationGraphConfiguration conf5 = - new NeuralNetConfiguration.Builder() + NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .graphBuilder().addInputs("input") .setInputTypes(InputType.convolutional(28, 28, 1)) @@ -575,7 +575,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { public void testCompGraphUnderscores() { //Problem: underscores in names could be problematic for ComputationGraphUpdater, HistogramIterationListener - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).graphBuilder() .addInputs("input") .addLayer("first_layer", new DenseLayer.Builder().nIn(4).nOut(5).build(), "input") @@ -594,7 +594,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { @Test public void testPreTraining() { ComputationGraphConfiguration conf = - new NeuralNetConfiguration.Builder() + NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(1e-6)) .l2(2e-4).graphBuilder().addInputs("in") @@ -648,7 +648,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { int nIn = 5; int nOut = 6; ComputationGraphConfiguration conf = - new NeuralNetConfiguration.Builder().seed(12345).l1(0.01).l2(0.01) + NeuralNetConfiguration.builder().seed(12345).l1(0.01).l2(0.01) .updater(new Sgd(0.1)) .activation(Activation.TANH).weightInit(WeightInit.XAVIER) .graphBuilder().addInputs("in") @@ -660,7 +660,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { .setOutputs("2").build(); ComputationGraphConfiguration confNoReg = - new NeuralNetConfiguration.Builder().seed(12345).updater(new Sgd(0.1)).activation(Activation.TANH) + NeuralNetConfiguration.builder().seed(12345).updater(new Sgd(0.1)).activation(Activation.TANH) .weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in") .addLayer("0", new DenseLayer.Builder().nIn(nIn).nOut(20).build(), "in") .addLayer("1", new DenseLayer.Builder().nIn(20).nOut(30).build(), "0") @@ -717,7 +717,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { INDArray outData = Nd4j.rand(3, 10); Nd4j.getRandom().setSeed(12345); - ComputationGraphConfiguration standard = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1)) + ComputationGraphConfiguration standard = NeuralNetConfiguration.builder().updater(new Sgd(0.1)) .trainingWorkspaceMode(ws).inferenceWorkspaceMode(ws) .seed(12345).graphBuilder().addInputs("in") .addLayer("l0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in") @@ -729,7 +729,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { Nd4j.getRandom().setSeed(12345); - ComputationGraphConfiguration external = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1)) + ComputationGraphConfiguration external = NeuralNetConfiguration.builder().updater(new Sgd(0.1)) .trainingWorkspaceMode(ws).inferenceWorkspaceMode(ws) .seed(12345).graphBuilder().addInputs("in") .addLayer("l0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in").setOutputs("l0") @@ -771,7 +771,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { for(WorkspaceMode ws : WorkspaceMode.values()) { // System.out.println("***** WORKSPACE: " + ws); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .updater(new Adam(0.01)) .trainingWorkspaceMode(ws) .inferenceWorkspaceMode(ws) @@ -819,7 +819,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { int nIn = 2; int nOut = 4; - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") .addLayer("0", new DenseLayer.Builder().nIn(nIn).nOut(4).activation(Activation.RELU).build(), "in") @@ -857,7 +857,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { expectedGradient.setGradientFor("output_W", Nd4j.ones(5, 3)); expectedGradient.setGradientFor("output_b", Nd4j.ones(1, 3)); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).graphBuilder() .addInputs("input").addLayer("first", new DenseLayer.Builder().nIn(4).nOut(5).build(), "input") .addLayer("output", new OutputLayer.Builder().nIn(5).nOut(3).activation(Activation.SOFTMAX).build(), "first") @@ -893,7 +893,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { public void testCnnFlatInputType1() { //First: check conv input type. Expect: no preprocessor, nIn set appropriately - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().graphBuilder().addInputs("in") + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().graphBuilder().addInputs("in") .setInputTypes(InputType.convolutional(10, 8, 3)) .addLayer("layer", new ConvolutionLayer.Builder().kernelSize(2, 2).padding(0, 0).stride(1, 1) @@ -903,14 +903,14 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { .build(); LayerVertex lv = (LayerVertex) conf.getVertices().get("layer"); - FeedForwardLayer l = ((FeedForwardLayer) (lv).getLayerConf().getLayer()); + FeedForwardLayer l = ((FeedForwardLayer) (lv).getNetConfiguration().getFirstLayer()); assertEquals(3, l.getNIn()); assertNull(lv.getPreProcessor()); //Check the equivalent config, but with flat conv data input instead //In this case, the only difference should be the addition of a preprocessor //First: check conv input type. Expect: no preprocessor, nIn set appropriately - conf = new NeuralNetConfiguration.Builder().graphBuilder().addInputs("in") + conf = NeuralNetConfiguration.builder().graphBuilder().addInputs("in") .setInputTypes(InputType.convolutionalFlat(10, 8, 3)) .addLayer("layer", new ConvolutionLayer.Builder().kernelSize(2, 2).padding(0, 0).stride(1, 1) @@ -920,7 +920,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { .build(); lv = (LayerVertex) conf.getVertices().get("layer"); - l = ((FeedForwardLayer) (lv).getLayerConf().getLayer()); + l = ((FeedForwardLayer) (lv).getNetConfiguration().getFirstLayer()); assertEquals(3, l.getNIn()); assertNotNull(lv.getPreProcessor()); InputPreProcessor preProcessor = lv.getPreProcessor(); @@ -932,7 +932,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { //Finally, check configuration with a subsampling layer - conf = new NeuralNetConfiguration.Builder().graphBuilder().addInputs("in") + conf = NeuralNetConfiguration.builder().graphBuilder().addInputs("in") .setInputTypes(InputType.convolutionalFlat(10, 8, 3)) .addLayer("l0", new SubsamplingLayer.Builder().kernelSize(2, 2).stride(1, 1).padding(0, 0) .build(), "in") @@ -945,7 +945,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { //Check subsampling layer: lv = (LayerVertex) conf.getVertices().get("l0"); - SubsamplingLayer sl = ((SubsamplingLayer) (lv).getLayerConf().getLayer()); + SubsamplingLayer sl = ((SubsamplingLayer) (lv).getNetConfiguration().getFirstLayer()); assertNotNull(lv.getPreProcessor()); preProcessor = lv.getPreProcessor(); assertTrue(preProcessor instanceof FeedForwardToCnnPreProcessor); @@ -955,7 +955,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { assertEquals(3, preproc.getNumChannels()); //Check dense layer lv = (LayerVertex) conf.getVertices().get("layer"); - l = ((FeedForwardLayer) (lv).getLayerConf().getLayer()); + l = ((FeedForwardLayer) (lv).getNetConfiguration().getFirstLayer()); assertEquals(3, l.getNIn()); assertNull(lv.getPreProcessor()); @@ -970,7 +970,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { graph.init(); Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration mlnConfig = getIrisMLNConfiguration(); + NeuralNetConfiguration mlnConfig = getIrisMLNConfiguration(); MultiLayerNetwork net = new MultiLayerNetwork(mlnConfig); net.init(); @@ -999,7 +999,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { for (OptimizationAlgorithm oa : oas) { // System.out.println(oa); ComputationGraphConfiguration conf = - new NeuralNetConfiguration.Builder().optimizationAlgo(oa).graphBuilder() + NeuralNetConfiguration.builder().optimizationAlgo(oa).graphBuilder() .addInputs("input") .addLayer("first", new DenseLayer.Builder().nIn(4).nOut(5).build(), "input") .addLayer("output", new OutputLayer.Builder().nIn(5).nOut(3).activation(Activation.SOFTMAX).build(), @@ -1016,7 +1016,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { @Test public void testIterationCountAndPersistence() throws IOException { Nd4j.getRandom().setSeed(123); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(123) .graphBuilder().addInputs("in") .addLayer("0", new DenseLayer.Builder().nIn(4).nOut(3).weightInit(WeightInit.XAVIER) @@ -1054,7 +1054,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { @Test public void printSummary() { - NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1)) + NeuralNetConfiguration.NeuralNetConfigurationBuilder overallConf = NeuralNetConfiguration.builder().updater(new Sgd(0.1)) .activation(Activation.IDENTITY); ComputationGraphConfiguration conf = overallConf.graphBuilder().addInputs("inCentre", "inRight") @@ -1095,7 +1095,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { @Test public void testFeedForwardIncludeNonLayerVertices() { - ComputationGraphConfiguration c = new NeuralNetConfiguration.Builder().graphBuilder().addInputs("in") + ComputationGraphConfiguration c = NeuralNetConfiguration.builder().graphBuilder().addInputs("in") .addLayer("0", new DenseLayer.Builder().nIn(5).nOut(5).build(), "in") .addLayer("1", new DenseLayer.Builder().nIn(5).nOut(5).build(), "in") .addVertex("merge", new MergeVertex(), "0", "1") @@ -1123,7 +1123,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { //Users generally shouldn't do this, but multiple setOutputs calls should *replace* not *add* outputs - ComputationGraphConfiguration c = new NeuralNetConfiguration.Builder().graphBuilder().addInputs("in") + ComputationGraphConfiguration c = NeuralNetConfiguration.builder().graphBuilder().addInputs("in") .addLayer("out", new OutputLayer.Builder().nIn(10).nOut(5).activation(Activation.SOFTMAX).build(), "in").setOutputs("out") .setOutputs("out").build(); @@ -1135,7 +1135,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { public void testDropoutValidation() { //At one point: this threw an exception due to incorrect validation for (boolean dropConnect : new boolean[]{false, true}) { - new NeuralNetConfiguration.Builder().weightNoise(new DropConnect(0.5)) + NeuralNetConfiguration.builder().weightNoise(new DropConnect(0.5)) .graphBuilder().setInputTypes(InputType.feedForward(1)).addInputs("input1") .addLayer("output", new OutputLayer.Builder(LossFunctions.LossFunction.MSE).nIn(1).nOut(1) @@ -1151,7 +1151,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { //Don't care about this being valid ComputationGraphConfiguration c = - new NeuralNetConfiguration.Builder().l1(0.5).l2(0.6).graphBuilder() + NeuralNetConfiguration.builder().l1(0.5).l2(0.6).graphBuilder() .addInputs("in") .addLayer("sub1", new SubsamplingLayer.Builder(2, 2).build(), "in") .addLayer("sub2", new Subsampling1DLayer.Builder(2).build(), "sub1") @@ -1178,7 +1178,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { @Test public void testErrorNoOutputLayer() { - ComputationGraphConfiguration c = new NeuralNetConfiguration.Builder().graphBuilder().addInputs("in") + ComputationGraphConfiguration c = NeuralNetConfiguration.builder().graphBuilder().addInputs("in") .addLayer("dense", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in").setOutputs("dense") .build(); @@ -1202,7 +1202,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { //When a vertex supports only one input, and gets multiple inputs - we should automatically add a merge //vertex - NeuralNetConfiguration nnc = new NeuralNetConfiguration(); + NeuralNetConfiguration nnc = NeuralNetConfiguration.builder().build(); nnc.setLayer(new DenseLayer.Builder().build()); GraphVertex[] singleInputVertices = new GraphVertex[]{new L2NormalizeVertex(), new LayerVertex(nnc, null), new PoolHelperVertex(), new PreprocessorVertex(), new ReshapeVertex(1, 1), @@ -1210,7 +1210,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { new DuplicateToTimeSeriesVertex("in1"), new LastTimeStepVertex("in1")}; for (GraphVertex gv : singleInputVertices) { - ComputationGraphConfiguration c = new NeuralNetConfiguration.Builder().graphBuilder() + ComputationGraphConfiguration c = NeuralNetConfiguration.builder().graphBuilder() .addInputs("in1", "in2").addVertex("gv", gv, "in1", "in2").setOutputs("gv").build(); boolean foundMerge = false; @@ -1238,7 +1238,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { int depth = 3; INDArray img = Nd4j.ones(minibatch, depth, height, width); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("input") .addLayer("L1", new ConvolutionLayer.Builder(new int[]{1, 1}, new int[]{1, 1}, new int[]{0, 0}).nIn(depth).nOut(depth) @@ -1262,7 +1262,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { @Test public void testEpochCounter() throws Exception { - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") .addLayer("out", new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX).build(), "in") @@ -1302,7 +1302,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { int V_HEIGHT = 130; int V_NFRAMES = 150; ComputationGraphConfiguration confForArchitecture = - new NeuralNetConfiguration.Builder().seed(12345).l2(0.001) //l2 regularization on all layers + NeuralNetConfiguration.builder().seed(12345).l2(0.001) //l2 regularization on all layers .updater(new AdaGrad(0.4)).graphBuilder() .addInputs("in") .addLayer("layer0", new ConvolutionLayer.Builder(10, 10).nIn(3) //3 channels: RGB @@ -1331,7 +1331,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { .inputPreProcessor("layer3", new CnnToFeedForwardPreProcessor(7, 7, 10)) .inputPreProcessor("layer4", new FeedForwardToRnnPreProcessor()) .backpropType(BackpropType.TruncatedBPTT) - .tBPTTForwardLength(V_NFRAMES / 5).tBPTTBackwardLength(V_NFRAMES / 5).build(); + .tbpttFwdLength(V_NFRAMES / 5).tbpttBackLength(V_NFRAMES / 5).build(); ComputationGraph modelExpectedArch = new ComputationGraph(confForArchitecture); modelExpectedArch.init(); ComputationGraph modelMow = new TransferLearning.GraphBuilder(modelExpectedArch).setFeatureExtractor("layer2").build(); @@ -1347,7 +1347,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { public void testInputClearance() throws Exception { //Activations should be cleared - if not, it's possible for out of (workspace) scope arrays to be around // which can cause a crash - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .convolutionMode(ConvolutionMode.Same) .graphBuilder() .addInputs("in") @@ -1383,7 +1383,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { for(boolean allowDisconnected : new boolean[]{false, true}) { try { - ComputationGraphConfiguration.GraphBuilder b = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration.GraphBuilder b = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") .addLayer("0", new DenseLayer.Builder().activation(Activation.SIGMOID).nOut(8).build(), "in") @@ -1414,7 +1414,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { @Test public void testLayerSize(){ - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") @@ -1436,7 +1436,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { assertEquals(3, net.layerInputSize(0)); assertEquals(0, net.layerInputSize(1)); - assertEquals(((FeedForwardLayer)net.getLayer(2).conf().getLayer()).getNIn(), net.layerInputSize(2)); + assertEquals(((FeedForwardLayer)net.getLayer(2).getLayerConfiguration()).getNIn(), net.layerInputSize(2)); assertEquals(30, net.layerInputSize(3)); assertEquals(6, net.layerSize("0")); @@ -1446,14 +1446,14 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { assertEquals(3, net.layerInputSize("0")); assertEquals(0, net.layerInputSize("1")); - assertEquals(((FeedForwardLayer)net.getLayer(2).conf().getLayer()).getNIn(), net.layerInputSize("2")); + assertEquals(((FeedForwardLayer)net.getLayer(2).getLayerConfiguration()).getNIn(), net.layerInputSize("2")); assertEquals(30, net.layerInputSize("3")); } @Test public void testZeroParamNet() throws Exception { - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") .layer("0", new SubsamplingLayer.Builder().kernelSize(2,2).stride(2,2).build(), "in") @@ -1494,7 +1494,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { final String inputName = "input"; final String outputName = "output"; final String scaleName = "scale"; - final ComputationGraph graph = new ComputationGraph(new NeuralNetConfiguration.Builder() + final ComputationGraph graph = new ComputationGraph(NeuralNetConfiguration.builder() //.inferenceWorkspaceMode(WorkspaceMode.NONE) .graphBuilder() .addInputs(inputName) @@ -1535,7 +1535,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { DataSet all = new IrisDataSetIterator(150,150).next(); DataSetIterator iter = new IrisDataSetIterator(5,150); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .seed(12345) .graphBuilder() .addInputs("in") @@ -1558,7 +1558,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { //Test for a simple net: - ComputationGraphConfiguration.GraphBuilder builder = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration.GraphBuilder builder = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in1", "in2") .layer("0", new DenseLayer.Builder().nOut(10).build(), "in1") @@ -1595,7 +1595,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { @Test public void testTopoSortSaving(){ - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in1", "in2") .addLayer("l0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in1") @@ -1694,7 +1694,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { //The fit methods should *not* do layerwise pretraining: - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") @@ -1742,7 +1742,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { @Test public void testAllowInputModification(){ - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in1", "in2") @@ -1781,7 +1781,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { @Test public void testCompGraphDropoutOutputLayers(){ //https://github.com/deeplearning4j/deeplearning4j/issues/6326 - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .dropOut(0.8) .graphBuilder() .addInputs("in1", "in2") @@ -1819,7 +1819,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { @Test public void testCompGraphDropoutOutputLayers2() { //https://github.com/deeplearning4j/deeplearning4j/issues/6326 - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .dropOut(0.8) .graphBuilder() .addInputs("in1", "in2") @@ -1854,7 +1854,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { @Test public void testAddRemoveVertex() { - new NeuralNetConfiguration.Builder().graphBuilder() + NeuralNetConfiguration.builder().graphBuilder() .addVertex("toRemove", new ScaleVertex(0), "don't care") .addVertex("test", new ScaleVertex(0), "toRemove") .removeVertex("toRemove", true); @@ -1864,7 +1864,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { @Test public void testGetSetParamUnderscores(){ //Test get/set param with underscores in layer nome - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") .layer("layer_zero", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in") @@ -1890,7 +1890,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { @Test public void testOutputSpecificLayers(){ - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .seed(12345) .graphBuilder() .addInputs("in") @@ -1918,7 +1918,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { @Test public void singleInputElemVertex() { final InputType inputType = InputType.convolutional(10, 10, 2); - final ComputationGraph graph = new ComputationGraph(new NeuralNetConfiguration.Builder() + final ComputationGraph graph = new ComputationGraph(NeuralNetConfiguration.builder() .graphBuilder() .setInputTypes(inputType) .addInputs("input") @@ -1935,7 +1935,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { @Test public void testCloneDropoutIndependence(){ - val modelConf = new NeuralNetConfiguration.Builder() + val modelConf = NeuralNetConfiguration.builder() .updater(new Adam(0.01)) .weightInit(WeightInit.XAVIER_UNIFORM) .biasInit(0) @@ -1968,8 +1968,8 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { ComputationGraph cg2 = model.clone(); - IDropout d1 = model.getLayer(0).conf().getLayer().getIDropout(); - IDropout d2 = cg2.getLayer(0).conf().getLayer().getIDropout(); + IDropout d1 = model.getLayer(0).getLayerConfiguration().getIDropout(); + IDropout d2 = cg2.getLayer(0).getLayerConfiguration().getIDropout(); assertNotSame(d1, d2); //Should not be same object! assertEquals(d1, d2); //But should be equal @@ -1982,7 +1982,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { int hiddenSize = 100; int dataSize = 10; int seqLen = 5; - ComputationGraphConfiguration configuration = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration configuration = NeuralNetConfiguration.builder() .updater(new Adam()) .graphBuilder() .addInputs("x_emb") @@ -2021,7 +2021,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { //https://github.com/deeplearning4j/deeplearning4j/issues/6809#issuecomment-463892644 double lr = 1e-3; - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .seed(12345) .weightInit(WeightInit.XAVIER) .updater(new Adam(lr)) @@ -2121,7 +2121,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { int outputSize = 6; int layerSize = 3; - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .seed(12345) .weightInit(WeightInit.XAVIER) @@ -2152,7 +2152,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { @Test public void testConv3dMergeVertex(){ - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .graphBuilder() .addLayer("l0", new Convolution3D.Builder().kernelSize(2,2,2).stride(1,1,1).nIn(3).nOut(3).dataFormat(Convolution3D.DataFormat.NCDHW).build(), "in") @@ -2172,7 +2172,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { @Test public void testDualEmbedding(){ - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") .addLayer("e1", new EmbeddingLayer.Builder().nIn(10).nOut(5).build(), "in") @@ -2191,7 +2191,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { @Test public void testMergeNchw() throws Exception { - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .convolutionMode(ConvolutionMode.Same) .graphBuilder() .addInputs("in") diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestSetGetParameters.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestSetGetParameters.java index 0c17238db..ce8019133 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestSetGetParameters.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestSetGetParameters.java @@ -42,7 +42,7 @@ public class TestSetGetParameters extends BaseDL4JTest { Nd4j.getRandom().setSeed(12345); //Create configuration. Doesn't matter if this doesn't actually work for forward/backward pass here - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).graphBuilder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345).graphBuilder() .addInputs("in").addLayer("0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in") .addLayer("1", new GravesLSTM.Builder().nIn(10).nOut(10).build(), "in") .addLayer("2", new GravesBidirectionalLSTM.Builder().nIn(10).nOut(10).build(), "in") diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestVariableLengthTSCG.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestVariableLengthTSCG.java index 96e1dcf12..237e7550e 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestVariableLengthTSCG.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestVariableLengthTSCG.java @@ -68,7 +68,7 @@ public class TestVariableLengthTSCG extends BaseDL4JTest { for (int nExamples : miniBatchSizes) { Nd4j.getRandom().setSeed(12345); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(0.1)).seed(12345).graphBuilder().addInputs("in") .addLayer("0", new GravesLSTM.Builder().activation(Activation.TANH).nIn(2).nOut(2).build(), @@ -158,7 +158,7 @@ public class TestVariableLengthTSCG extends BaseDL4JTest { for (int nExamples : miniBatchSizes) { Nd4j.getRandom().setSeed(12345); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(new NormalDistribution(0,2)) .updater(new Sgd(0.1)).seed(12345).graphBuilder().addInputs("in") @@ -300,7 +300,7 @@ public class TestVariableLengthTSCG extends BaseDL4JTest { INDArray labels = Nd4j.ones(miniBatch, nOut, tsLength); ComputationGraphConfiguration conf = - new NeuralNetConfiguration.Builder().seed(12345L) + NeuralNetConfiguration.builder().seed(12345L) .graphBuilder() .addInputs("in").addLayer("0", new GravesLSTM.Builder().nIn(nIn).nOut(5) @@ -370,7 +370,7 @@ public class TestVariableLengthTSCG extends BaseDL4JTest { INDArray input = Nd4j.rand(miniBatch, nIn, tsLength); ComputationGraphConfiguration conf = - new NeuralNetConfiguration.Builder().seed(12345L) + NeuralNetConfiguration.builder().seed(12345L) .graphBuilder() .addInputs("in").addLayer("0", new GravesLSTM.Builder().nIn(nIn).nOut(5) @@ -391,7 +391,7 @@ public class TestVariableLengthTSCG extends BaseDL4JTest { net.init(); ComputationGraphConfiguration conf2 = - new NeuralNetConfiguration.Builder().seed(12345L) + NeuralNetConfiguration.builder().seed(12345L) .graphBuilder() .addInputs("in").addLayer("0", new GravesLSTM.Builder().nIn(nIn).nOut(5) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/graphnodes/TestGraphNodes.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/graphnodes/TestGraphNodes.java index ba3eb90bb..3ca1aa8bd 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/graphnodes/TestGraphNodes.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/graphnodes/TestGraphNodes.java @@ -188,7 +188,7 @@ public class TestGraphNodes extends BaseDL4JTest { @Test public void testLastTimeStepVertex() { - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().graphBuilder().addInputs("in") + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().graphBuilder().addInputs("in") .addVertex("lastTS", new LastTimeStepVertex("in"), "in") .addLayer("out", new OutputLayer.Builder().nIn(1).nOut(1).activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE).build(), "lastTS").setOutputs("out") .build(); @@ -239,7 +239,7 @@ public class TestGraphNodes extends BaseDL4JTest { @Test public void testDuplicateToTimeSeriesVertex() { - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().graphBuilder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().graphBuilder() .addInputs("in2d", "in3d") .addVertex("duplicateTS", new DuplicateToTimeSeriesVertex("in3d"), "in2d") .addLayer("out", new OutputLayer.Builder().nIn(1).nOut(1).activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE).build(), "duplicateTS") @@ -313,7 +313,7 @@ public class TestGraphNodes extends BaseDL4JTest { null, null); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().graphBuilder().addInputs("in1", "in2") + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().graphBuilder().addInputs("in1", "in2") .addVertex("stack", new org.deeplearning4j.nn.conf.graph.StackVertex(), "in1", "in2") .addLayer("1", new EmbeddingLayer.Builder().nIn(5).nOut(5).build(), "stack") .addVertex("unstack1", new org.deeplearning4j.nn.conf.graph.UnstackVertex(0, 2), "1") @@ -540,7 +540,7 @@ public class TestGraphNodes extends BaseDL4JTest { public void testJSON() { //The config here is non-sense, but that doesn't matter for config -> json -> config test ComputationGraphConfiguration conf = - new NeuralNetConfiguration.Builder().graphBuilder().addInputs("in") + NeuralNetConfiguration.builder().graphBuilder().addInputs("in") .addVertex("v1", new ElementWiseVertex(ElementWiseVertex.Op.Add), "in") .addVertex("v2", new org.deeplearning4j.nn.conf.graph.MergeVertex(), "in", "in") .addVertex("v3", new PreprocessorVertex( @@ -565,7 +565,7 @@ public class TestGraphNodes extends BaseDL4JTest { int numLabelClasses = 10; int numInputs = 5; - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .trainingWorkspaceMode(WorkspaceMode.NONE) .inferenceWorkspaceMode(WorkspaceMode.NONE) .seed(123) //Random number generator seed for improved repeatability. Optional. diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/ActivationLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/ActivationLayerTest.java index 14e169767..629fd7069 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/ActivationLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/ActivationLayerTest.java @@ -24,7 +24,6 @@ import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.ActivationLayer; @@ -83,15 +82,17 @@ public class ActivationLayerTest extends BaseDL4JTest { DataSet next = iter.next(); // Run without separate activation layer - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(123) - .list() - .layer(0, new DenseLayer.Builder().nIn(28 * 28).nOut(10).activation(Activation.RELU) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() + .optimizationAlgo(OptimizationAlgorithm.LBFGS) + .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) + .seed(123) + .layer(0, new DenseLayer.Builder().nIn(28 * 28).nOut(10).activation(Activation.RELU) .weightInit(WeightInit.XAVIER).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + + .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( LossFunctions.LossFunction.MCXENT).weightInit(WeightInit.XAVIER) .activation(Activation.SOFTMAX).nIn(10).nOut(10).build()) - .build(); + .build(); MultiLayerNetwork network = new MultiLayerNetwork(conf); network.init(); @@ -99,7 +100,7 @@ public class ActivationLayerTest extends BaseDL4JTest { // Run with separate activation layer - MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(123) .list() .layer(0, new DenseLayer.Builder().nIn(28 * 28).nOut(10).activation(Activation.IDENTITY) @@ -152,7 +153,7 @@ public class ActivationLayerTest extends BaseDL4JTest { // Run without separate activation layer Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(123) .list() .layer(0, new AutoEncoder.Builder().nIn(nIn).nOut(layerSize).corruptionLevel(0.0) @@ -170,7 +171,7 @@ public class ActivationLayerTest extends BaseDL4JTest { // Run with separate activation layer Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(123) .list() .layer(0, new AutoEncoder.Builder().nIn(nIn).nOut(layerSize).corruptionLevel(0.0) @@ -214,7 +215,7 @@ public class ActivationLayerTest extends BaseDL4JTest { DataSet next = iter.next(); // Run without separate activation layer - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(123) .list() .layer(0, new ConvolutionLayer.Builder(4, 4).stride(2, 2).nIn(1).nOut(20) @@ -222,7 +223,7 @@ public class ActivationLayerTest extends BaseDL4JTest { .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( LossFunctions.LossFunction.MCXENT).weightInit(WeightInit.XAVIER) .activation(Activation.SOFTMAX).nOut(10).build()) - .setInputType(InputType.convolutionalFlat(28, 28, 1)).build(); + .inputType(InputType.convolutionalFlat(28, 28, 1)).build(); MultiLayerNetwork network = new MultiLayerNetwork(conf); network.init(); @@ -230,8 +231,8 @@ public class ActivationLayerTest extends BaseDL4JTest { // Run with separate activation layer - MultiLayerConfiguration conf2 = - new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf2 = + NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .seed(123).list() .layer(0, new ConvolutionLayer.Builder(4, 4).stride(2, 2).nIn(1).nOut(20) @@ -243,7 +244,7 @@ public class ActivationLayerTest extends BaseDL4JTest { .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX) .nOut(10).build()) - .setInputType(InputType.convolutionalFlat(28, 28, 1)).build(); + .inputType(InputType.convolutionalFlat(28, 28, 1)).build(); MultiLayerNetwork network2 = new MultiLayerNetwork(conf2); network2.init(); @@ -271,7 +272,7 @@ public class ActivationLayerTest extends BaseDL4JTest { @Test public void testActivationInheritance() { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(123) .weightInit(WeightInit.XAVIER) .activation(Activation.RATIONALTANH) @@ -287,19 +288,19 @@ public class ActivationLayerTest extends BaseDL4JTest { MultiLayerNetwork network = new MultiLayerNetwork(conf); network.init(); - assertNotNull(((ActivationLayer)network.getLayer(1).conf().getLayer()).getActivationFn()); + assertNotNull(((ActivationLayer)network.getLayer(1).getLayerConfiguration()).getActivationFn()); - assertTrue(((DenseLayer)network.getLayer(0).conf().getLayer()).getActivationFn() instanceof ActivationRationalTanh); - assertTrue(((ActivationLayer)network.getLayer(1).conf().getLayer()).getActivationFn() instanceof ActivationRationalTanh); - assertTrue(((ActivationLayer)network.getLayer(2).conf().getLayer()).getActivationFn() instanceof ActivationRationalTanh); - assertTrue(((ActivationLayer)network.getLayer(3).conf().getLayer()).getActivationFn() instanceof ActivationELU); - assertTrue(((OutputLayer)network.getLayer(4).conf().getLayer()).getActivationFn() instanceof ActivationSoftmax); + assertTrue(((DenseLayer)network.getLayer(0).getLayerConfiguration()).getActivationFn() instanceof ActivationRationalTanh); + assertTrue(((ActivationLayer)network.getLayer(1).getLayerConfiguration()).getActivationFn() instanceof ActivationRationalTanh); + assertTrue(((ActivationLayer)network.getLayer(2).getLayerConfiguration()).getActivationFn() instanceof ActivationRationalTanh); + assertTrue(((ActivationLayer)network.getLayer(3).getLayerConfiguration()).getActivationFn() instanceof ActivationELU); + assertTrue(((OutputLayer)network.getLayer(4).getLayerConfiguration()).getActivationFn() instanceof ActivationSoftmax); } @Test public void testActivationInheritanceCG() { - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(123) .weightInit(WeightInit.XAVIER) .activation(Activation.RATIONALTANH) @@ -317,13 +318,13 @@ public class ActivationLayerTest extends BaseDL4JTest { ComputationGraph network = new ComputationGraph(conf); network.init(); - assertNotNull(((ActivationLayer)network.getLayer("1").conf().getLayer()).getActivationFn()); + assertNotNull(((ActivationLayer)network.getLayer("1").getLayerConfiguration()).getActivationFn()); - assertTrue(((DenseLayer)network.getLayer("0").conf().getLayer()).getActivationFn() instanceof ActivationRationalTanh); - assertTrue(((ActivationLayer)network.getLayer("1").conf().getLayer()).getActivationFn() instanceof ActivationRationalTanh); - assertTrue(((ActivationLayer)network.getLayer("2").conf().getLayer()).getActivationFn() instanceof ActivationRationalTanh); - assertTrue(((ActivationLayer)network.getLayer("3").conf().getLayer()).getActivationFn() instanceof ActivationELU); - assertTrue(((OutputLayer)network.getLayer("4").conf().getLayer()).getActivationFn() instanceof ActivationSoftmax); + assertTrue(((DenseLayer)network.getLayer("0").getLayerConfiguration()).getActivationFn() instanceof ActivationRationalTanh); + assertTrue(((ActivationLayer)network.getLayer("1").getLayerConfiguration()).getActivationFn() instanceof ActivationRationalTanh); + assertTrue(((ActivationLayer)network.getLayer("2").getLayerConfiguration()).getActivationFn() instanceof ActivationRationalTanh); + assertTrue(((ActivationLayer)network.getLayer("3").getLayerConfiguration()).getActivationFn() instanceof ActivationELU); + assertTrue(((OutputLayer)network.getLayer("4").getLayerConfiguration()).getActivationFn() instanceof ActivationSoftmax); } } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/AutoEncoderTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/AutoEncoderTest.java index f841d1454..8b63b88b4 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/AutoEncoderTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/AutoEncoderTest.java @@ -47,7 +47,7 @@ public class AutoEncoderTest extends BaseDL4JTest { int in2Size = 15; int hiddenSize = 10; - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .weightInit(WeightInit.XAVIER) .graphBuilder() .addInputs("in1", "in2") diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/BaseLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/BaseLayerTest.java index bc1b2db87..3162ed209 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/BaseLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/BaseLayerTest.java @@ -23,7 +23,6 @@ package org.deeplearning4j.nn.layers; import lombok.val; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.nn.api.Layer; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; import org.deeplearning4j.nn.conf.layers.DenseLayer; @@ -81,12 +80,12 @@ public class BaseLayerTest extends BaseDL4JTest { int nIn = 2; int nOut = 2; - NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .layer(new ConvolutionLayer.Builder().nIn(nIn).nOut(nOut).build()).build(); - val numParams = conf.getLayer().initializer().numParams(conf); + val numParams = conf.getFirstLayer().initializer().numParams(conf); INDArray params = Nd4j.create(1, numParams); - return conf.getLayer().instantiate(conf, null, 0, params, true, params.dataType()); + return conf.getFirstLayer().instantiate(conf, null, 0, params, true, params.dataType()); } @@ -94,7 +93,7 @@ public class BaseLayerTest extends BaseDL4JTest { int nIn = 2; int nOut = 2; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(nOut).build()) .layer(1, new OutputLayer.Builder().nIn(nIn).nOut(nOut).activation(Activation.SOFTMAX).build()).build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/CacheModeTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/CacheModeTest.java index 7b55a4641..002495133 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/CacheModeTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/CacheModeTest.java @@ -41,8 +41,8 @@ public class CacheModeTest extends BaseDL4JTest { @Test public void testConvCacheModeSimple(){ - MultiLayerConfiguration conf1 = getConf(CacheMode.NONE); - MultiLayerConfiguration conf2 = getConf(CacheMode.DEVICE); + NeuralNetConfiguration conf1 = getConf(CacheMode.NONE); + NeuralNetConfiguration conf2 = getConf(CacheMode.DEVICE); MultiLayerNetwork net1 = new MultiLayerNetwork(conf1); net1.init(); @@ -62,8 +62,8 @@ public class CacheModeTest extends BaseDL4JTest { assertEquals(net1.params(), net2.params()); } - private static MultiLayerConfiguration getConf(CacheMode cacheMode){ - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + private static NeuralNetConfiguration getConf(CacheMode cacheMode){ + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .activation(Activation.TANH) .inferenceWorkspaceMode(WorkspaceMode.ENABLED) .trainingWorkspaceMode(WorkspaceMode.ENABLED) @@ -73,7 +73,7 @@ public class CacheModeTest extends BaseDL4JTest { .layer(new ConvolutionLayer.Builder().nOut(3).build()) .layer(new ConvolutionLayer.Builder().nOut(3).build()) .layer(new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).build()) - .setInputType(InputType.convolutionalFlat(28, 28, 1)) + .inputType(InputType.convolutionalFlat(28, 28, 1)) .build(); return conf; @@ -84,8 +84,8 @@ public class CacheModeTest extends BaseDL4JTest { for(boolean graves : new boolean[]{true, false}) { - MultiLayerConfiguration conf1 = getConfLSTM(CacheMode.NONE, graves); - MultiLayerConfiguration conf2 = getConfLSTM(CacheMode.DEVICE, graves); + NeuralNetConfiguration conf1 = getConfLSTM(CacheMode.NONE, graves); + NeuralNetConfiguration conf2 = getConfLSTM(CacheMode.DEVICE, graves); MultiLayerNetwork net1 = new MultiLayerNetwork(conf1); net1.init(); @@ -106,8 +106,8 @@ public class CacheModeTest extends BaseDL4JTest { } } - private static MultiLayerConfiguration getConfLSTM(CacheMode cacheMode, boolean graves){ - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + private static NeuralNetConfiguration getConfLSTM(CacheMode cacheMode, boolean graves){ + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .activation(Activation.TANH) .inferenceWorkspaceMode(WorkspaceMode.ENABLED) .trainingWorkspaceMode(WorkspaceMode.ENABLED) @@ -152,7 +152,7 @@ public class CacheModeTest extends BaseDL4JTest { } private static ComputationGraphConfiguration getConfCG(CacheMode cacheMode){ - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .activation(Activation.TANH) .inferenceWorkspaceMode(WorkspaceMode.ENABLED) .trainingWorkspaceMode(WorkspaceMode.ENABLED) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/CenterLossOutputLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/CenterLossOutputLayerTest.java index 73bd4c333..9f5597199 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/CenterLossOutputLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/CenterLossOutputLayerTest.java @@ -52,7 +52,7 @@ public class CenterLossOutputLayerTest extends BaseDL4JTest { private ComputationGraph getGraph(int numLabels, double lambda) { Nd4j.getRandom().setSeed(12345); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .dist(new NormalDistribution(0, 1)).updater(new NoOp()) .graphBuilder().addInputs("input1") @@ -73,7 +73,7 @@ public class CenterLossOutputLayerTest extends BaseDL4JTest { int nChannels = 1; // Number of input channels int outputNum = 10; // The number of possible outcomes - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) // Training iterations as above + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345) // Training iterations as above .l2(0.0005).weightInit(WeightInit.XAVIER) .updater(new Nesterovs(0.01, 0.9)) .graphBuilder().addInputs("input") diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/DropoutLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/DropoutLayerTest.java index 3aa7e37dd..716bbb8a9 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/DropoutLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/DropoutLayerTest.java @@ -25,7 +25,6 @@ import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.InputPreProcessor; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.dropout.Dropout; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -76,7 +75,7 @@ public class DropoutLayerTest extends BaseDL4JTest { @Test public void testDropoutLayerWithoutTraining() throws Exception { - MultiLayerConfiguration confIntegrated = new NeuralNetConfiguration.Builder().seed(3648) + NeuralNetConfiguration confIntegrated = NeuralNetConfiguration.builder().seed(3648) .list().layer(0, new ConvolutionLayer.Builder(1, 1).stride(1, 1).nIn(1).nOut(1).dropOut(0.25) .activation(Activation.IDENTITY).weightInit(WeightInit.XAVIER) @@ -85,7 +84,7 @@ public class DropoutLayerTest extends BaseDL4JTest { .activation(Activation.SOFTMAX) .weightInit(WeightInit.XAVIER).dropOut(0.25) .nOut(4).build()) - .setInputType(InputType.convolutionalFlat(2, 2, 1)).build(); + .inputType(InputType.convolutionalFlat(2, 2, 1)).build(); MultiLayerNetwork netIntegrated = new MultiLayerNetwork(confIntegrated); netIntegrated.init(); @@ -94,8 +93,8 @@ public class DropoutLayerTest extends BaseDL4JTest { netIntegrated.getLayer(1).setParam("W", Nd4j.eye(4)); netIntegrated.getLayer(1).setParam("b", Nd4j.zeros(4, 1)); - MultiLayerConfiguration confSeparate = - new NeuralNetConfiguration.Builder() + NeuralNetConfiguration confSeparate = + NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .seed(3648) .list().layer(0, @@ -109,7 +108,7 @@ public class DropoutLayerTest extends BaseDL4JTest { .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX) .nOut(4).build()) - .setInputType(InputType.convolutionalFlat(2, 2, 1)).build(); + .inputType(InputType.convolutionalFlat(2, 2, 1)).build(); MultiLayerNetwork netSeparate = new MultiLayerNetwork(confSeparate); netSeparate.init(); @@ -137,8 +136,8 @@ public class DropoutLayerTest extends BaseDL4JTest { List actTestSeparate = netSeparate.feedForward(in.dup(), false); //Check masks: - INDArray maskIntegrated = ((Dropout)netIntegrated.getLayer(0).conf().getLayer().getIDropout()).getMask(); - INDArray maskSeparate = ((Dropout)netSeparate.getLayer(0).conf().getLayer().getIDropout()).getMask(); + INDArray maskIntegrated = ((Dropout)netIntegrated.getLayer(0).getLayerConfiguration().getIDropout()).getMask(); + INDArray maskSeparate = ((Dropout)netSeparate.getLayer(0).getLayerConfiguration().getIDropout()).getMask(); assertEquals(maskIntegrated, maskSeparate); @@ -156,7 +155,7 @@ public class DropoutLayerTest extends BaseDL4JTest { DataSet next = iter.next(); // Run without separate activation layer - MultiLayerConfiguration confIntegrated = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration confIntegrated = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(123) .list() .layer(0, new DenseLayer.Builder().nIn(28 * 28).nOut(10) @@ -173,7 +172,7 @@ public class DropoutLayerTest extends BaseDL4JTest { netIntegrated.fit(next); // Run with separate activation layer - MultiLayerConfiguration confSeparate = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration confSeparate = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(123) .list() .layer(0, new DenseLayer.Builder().nIn(28 * 28).nOut(10).activation(Activation.RELU) @@ -229,7 +228,7 @@ public class DropoutLayerTest extends BaseDL4JTest { // Run without separate activation layer Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration confIntegrated = new NeuralNetConfiguration.Builder().seed(123) + NeuralNetConfiguration confIntegrated = NeuralNetConfiguration.builder().seed(123) .list().layer(0, new ConvolutionLayer.Builder(4, 4).stride(2, 2).nIn(1).nOut(20) .activation(Activation.TANH).weightInit(WeightInit.XAVIER) @@ -237,7 +236,7 @@ public class DropoutLayerTest extends BaseDL4JTest { .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).dropOut(0.5) .nOut(10).build()) - .setInputType(InputType.convolutionalFlat(28, 28, 1)).build(); + .inputType(InputType.convolutionalFlat(28, 28, 1)).build(); // Run with separate activation layer Nd4j.getRandom().setSeed(12345); @@ -248,14 +247,14 @@ public class DropoutLayerTest extends BaseDL4JTest { Map preProcessorMap = new HashMap<>(); preProcessorMap.put(1, new CnnToFeedForwardPreProcessor(13, 13, 20)); - MultiLayerConfiguration confSeparate = new NeuralNetConfiguration.Builder().seed(123).list() + NeuralNetConfiguration confSeparate = NeuralNetConfiguration.builder().seed(123).list() .layer(0, new ConvolutionLayer.Builder(4, 4).stride(2, 2).nIn(1).nOut(20) .activation(Activation.TANH).weightInit(WeightInit.XAVIER).build()) .layer(1, new DropoutLayer.Builder(0.5).build()) .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).nOut(10).build()) .inputPreProcessors(preProcessorMap) - .setInputType(InputType.convolutionalFlat(28, 28, 1)).build(); + .inputType(InputType.convolutionalFlat(28, 28, 1)).build(); Nd4j.getRandom().setSeed(12345); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/FrozenLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/FrozenLayerTest.java index 0f506dbfe..1e83adaf2 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/FrozenLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/FrozenLayerTest.java @@ -23,9 +23,11 @@ package org.deeplearning4j.nn.layers; import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration.NeuralNetConfigurationBuilder; import org.deeplearning4j.nn.conf.layers.DenseLayer; +import org.deeplearning4j.nn.conf.layers.DenseLayer.Builder; +import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.transferlearning.FineTuneConfiguration; @@ -40,6 +42,7 @@ import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; import java.util.List; +import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -54,19 +57,20 @@ public class FrozenLayerTest extends BaseDL4JTest { public void testFrozen() { DataSet randomData = new DataSet(Nd4j.rand(10, 4), Nd4j.rand(10, 3)); - NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1)) + NeuralNetConfiguration.NeuralNetConfigurationBuilder overallConf = NeuralNetConfiguration.builder().updater(new Sgd(0.1)) .activation(Activation.IDENTITY); FineTuneConfiguration finetune = new FineTuneConfiguration.Builder().updater(new Sgd(0.1)).build(); - MultiLayerNetwork modelToFineTune = new MultiLayerNetwork(overallConf.clone().list() - .layer(0, new DenseLayer.Builder().nIn(4).nOut(3).build()) - .layer(1, new DenseLayer.Builder().nIn(3).nOut(2).build()) - .layer(2, new DenseLayer.Builder().nIn(2).nOut(3).build()) - .layer(3, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( - LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(3).nOut(3) - .build()) - .build()); + MultiLayerNetwork modelToFineTune = new MultiLayerNetwork( + (NeuralNetConfiguration) ((NeuralNetConfigurationBuilder)overallConf).clone().list() + .layer(0, new Builder().nIn(4).nOut(3).build()) + .layer(1, new Builder().nIn(3).nOut(2).build()) + .layer(2, new Builder().nIn(2).nOut(3).build()) + .layer(3, new OutputLayer.Builder( + LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(3).nOut(3) + .build()) + .build()); modelToFineTune.init(); List ff = modelToFineTune.feedForwardToLayer(2, randomData.getFeatures(), false); @@ -77,12 +81,13 @@ public class FrozenLayerTest extends BaseDL4JTest { INDArray paramsLastTwoLayers = Nd4j.hstack(modelToFineTune.getLayer(2).params(), modelToFineTune.getLayer(3).params()); - MultiLayerNetwork notFrozen = new MultiLayerNetwork(overallConf.clone().list() - .layer(0, new DenseLayer.Builder().nIn(2).nOut(3).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( - LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(3).nOut(3) - .build()) - .build(), paramsLastTwoLayers); + MultiLayerNetwork notFrozen = new MultiLayerNetwork( + (NeuralNetConfiguration) overallConf.clone() + .layer(0, new Builder().nIn(2).nOut(3).build()) + .layer(1, new OutputLayer.Builder( + LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(3).nOut(3) + .build()) + .build(), paramsLastTwoLayers); // assertEquals(modelNow.getLayer(2).conf(), notFrozen.getLayer(0).conf()); //Equal, other than names // assertEquals(modelNow.getLayer(3).conf(), notFrozen.getLayer(1).conf()); //Equal, other than names @@ -109,16 +114,17 @@ public class FrozenLayerTest extends BaseDL4JTest { DataSet randomData = new DataSet(Nd4j.rand(10, 4), Nd4j.rand(10, 3)); - NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1)) + NeuralNetConfiguration.NeuralNetConfigurationBuilder overallConf = NeuralNetConfiguration.builder().updater(new Sgd(0.1)) .activation(Activation.IDENTITY); - MultiLayerNetwork modelToFineTune = new MultiLayerNetwork(overallConf.list() - .layer(0, new DenseLayer.Builder().nIn(4).nOut(3).build()) - .layer(1, new DenseLayer.Builder().nIn(3).nOut(2).build()) - .layer(2, new DenseLayer.Builder().nIn(2).nOut(3).build()) - .layer(3, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( - LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(3).nOut(3) - .build()) - .build()); + MultiLayerNetwork modelToFineTune = new MultiLayerNetwork( + (NeuralNetConfiguration) overallConf + .layer(0, new Builder().nIn(4).nOut(3).build()) + .layer(1, new Builder().nIn(3).nOut(2).build()) + .layer(2, new Builder().nIn(2).nOut(3).build()) + .layer(3, new OutputLayer.Builder( + LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(3).nOut(3) + .build()) + .build()); modelToFineTune.init(); INDArray asFrozenFeatures = modelToFineTune.feedForwardToLayer(2, randomData.getFeatures(), false).get(2); @@ -127,18 +133,18 @@ public class FrozenLayerTest extends BaseDL4JTest { MultiLayerNetwork clonedModel = modelNow.clone(); //Check json - assertEquals(modelNow.getLayerWiseConfigurations().toJson(), clonedModel.getLayerWiseConfigurations().toJson()); + assertEquals(modelNow.getConfiguration().toJson(), clonedModel.getConfiguration().toJson()); //Check params assertEquals(modelNow.params(), clonedModel.params()); MultiLayerNetwork notFrozen = new MultiLayerNetwork( - overallConf.list().layer(0, new DenseLayer.Builder().nIn(2).nOut(3).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( - LossFunctions.LossFunction.MCXENT) - .activation(Activation.SOFTMAX).nIn(3).nOut(3) - .build()) - .build(), + (NeuralNetConfiguration) overallConf.layer(0, new Builder().nIn(2).nOut(3).build()) + .layer(1, new OutputLayer.Builder( + LossFunction.MCXENT) + .activation(Activation.SOFTMAX).nIn(3).nOut(3) + .build()) + .build(), Nd4j.hstack(modelToFineTune.getLayer(2).params(), modelToFineTune.getLayer(3).params())); int i = 0; @@ -161,7 +167,7 @@ public class FrozenLayerTest extends BaseDL4JTest { public void testFrozenCompGraph() { DataSet randomData = new DataSet(Nd4j.rand(10, 4), Nd4j.rand(10, 3)); - NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1)) + NeuralNetConfiguration.NeuralNetConfigurationBuilder overallConf = NeuralNetConfiguration.builder().updater(new Sgd(0.1)) .activation(Activation.IDENTITY); ComputationGraph modelToFineTune = new ComputationGraph(overallConf.graphBuilder().addInputs("layer0In") @@ -212,7 +218,7 @@ public class FrozenLayerTest extends BaseDL4JTest { DataSet randomData = new DataSet(Nd4j.rand(10, 4), Nd4j.rand(10, 3)); - NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1)) + NeuralNetConfiguration.NeuralNetConfigurationBuilder overallConf = NeuralNetConfiguration.builder().updater(new Sgd(0.1)) .activation(Activation.IDENTITY); ComputationGraph modelToFineTune = new ComputationGraph(overallConf.graphBuilder().addInputs("layer0In") @@ -273,7 +279,7 @@ public class FrozenLayerTest extends BaseDL4JTest { public void testFrozenLayerInstantiation() { //We need to be able to instantitate frozen layers from JSON etc, and have them be the same as if // they were initialized via the builder - MultiLayerConfiguration conf1 = new NeuralNetConfiguration.Builder().seed(12345).list() + NeuralNetConfiguration conf1 = NeuralNetConfiguration.builder().seed(12345).list() .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).activation(Activation.TANH) .weightInit(WeightInit.XAVIER).build()) .layer(1, new DenseLayer.Builder().nIn(10).nOut(10).activation(Activation.TANH) @@ -283,7 +289,7 @@ public class FrozenLayerTest extends BaseDL4JTest { .nOut(10).build()) .build(); - MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder().seed(12345).list().layer(0, + NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder().seed(12345).list().layer(0, new org.deeplearning4j.nn.conf.layers.misc.FrozenLayer(new DenseLayer.Builder().nIn(10).nOut(10) .activation(Activation.TANH).weightInit(WeightInit.XAVIER).build())) .layer(1, new org.deeplearning4j.nn.conf.layers.misc.FrozenLayer( @@ -303,7 +309,7 @@ public class FrozenLayerTest extends BaseDL4JTest { String json = conf2.toJson(); - MultiLayerConfiguration fromJson = MultiLayerConfiguration.fromJson(json); + NeuralNetConfiguration fromJson = NeuralNetConfiguration.fromJson(json); assertEquals(conf2, fromJson); @@ -323,7 +329,7 @@ public class FrozenLayerTest extends BaseDL4JTest { //We need to be able to instantitate frozen layers from JSON etc, and have them be the same as if // they were initialized via the builder - ComputationGraphConfiguration conf1 = new NeuralNetConfiguration.Builder().seed(12345).graphBuilder() + ComputationGraphConfiguration conf1 = NeuralNetConfiguration.builder().seed(12345).graphBuilder() .addInputs("in") .addLayer("0", new DenseLayer.Builder().nIn(10).nOut(10).activation(Activation.TANH) .weightInit(WeightInit.XAVIER).build(), "in") @@ -335,7 +341,7 @@ public class FrozenLayerTest extends BaseDL4JTest { "1") .setOutputs("2").build(); - ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder().seed(12345).graphBuilder() + ComputationGraphConfiguration conf2 = NeuralNetConfiguration.builder().seed(12345).graphBuilder() .addInputs("in") .addLayer("0", new org.deeplearning4j.nn.conf.layers.misc.FrozenLayer.Builder() .layer(new DenseLayer.Builder().nIn(10).nOut(10).activation(Activation.TANH) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/FrozenLayerWithBackpropTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/FrozenLayerWithBackpropTest.java index dce5daebd..89c359ae7 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/FrozenLayerWithBackpropTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/FrozenLayerWithBackpropTest.java @@ -22,17 +22,13 @@ package org.deeplearning4j.nn.layers; import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.BaseDL4JTest; -import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.graph.MergeVertex; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; -import org.deeplearning4j.nn.transferlearning.FineTuneConfiguration; -import org.deeplearning4j.nn.transferlearning.TransferLearning; import org.deeplearning4j.nn.weights.WeightInit; import org.junit.jupiter.api.Test; import org.nd4j.linalg.activations.Activation; @@ -42,8 +38,6 @@ import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; -import java.util.List; - import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; @@ -55,7 +49,7 @@ public class FrozenLayerWithBackpropTest extends BaseDL4JTest { public void testFrozenWithBackpropLayerInstantiation() { //We need to be able to instantitate frozen layers from JSON etc, and have them be the same as if // they were initialized via the builder - MultiLayerConfiguration conf1 = new NeuralNetConfiguration.Builder().seed(12345).list() + NeuralNetConfiguration conf1 = NeuralNetConfiguration.builder().seed(12345).list() .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).activation(Activation.TANH) .weightInit(WeightInit.XAVIER).build()) .layer(1, new DenseLayer.Builder().nIn(10).nOut(10).activation(Activation.TANH) @@ -65,7 +59,7 @@ public class FrozenLayerWithBackpropTest extends BaseDL4JTest { .nOut(10).build()) .build(); - MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder().seed(12345).list().layer(0, + NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder().seed(12345).list().layer(0, new org.deeplearning4j.nn.conf.layers.misc.FrozenLayerWithBackprop(new DenseLayer.Builder().nIn(10).nOut(10) .activation(Activation.TANH).weightInit(WeightInit.XAVIER).build())) .layer(1, new org.deeplearning4j.nn.conf.layers.misc.FrozenLayerWithBackprop( @@ -85,7 +79,7 @@ public class FrozenLayerWithBackpropTest extends BaseDL4JTest { String json = conf2.toJson(); - MultiLayerConfiguration fromJson = MultiLayerConfiguration.fromJson(json); + NeuralNetConfiguration fromJson = NeuralNetConfiguration.fromJson(json); assertEquals(conf2, fromJson); @@ -105,7 +99,7 @@ public class FrozenLayerWithBackpropTest extends BaseDL4JTest { //We need to be able to instantitate frozen layers from JSON etc, and have them be the same as if // they were initialized via the builder - ComputationGraphConfiguration conf1 = new NeuralNetConfiguration.Builder().seed(12345).graphBuilder() + ComputationGraphConfiguration conf1 = NeuralNetConfiguration.builder().seed(12345).graphBuilder() .addInputs("in") .addLayer("0", new DenseLayer.Builder().nIn(10).nOut(10).activation(Activation.TANH) .weightInit(WeightInit.XAVIER).build(), "in") @@ -117,7 +111,7 @@ public class FrozenLayerWithBackpropTest extends BaseDL4JTest { "1") .setOutputs("2").build(); - ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder().seed(12345).graphBuilder() + ComputationGraphConfiguration conf2 = NeuralNetConfiguration.builder().seed(12345).graphBuilder() .addInputs("in") .addLayer("0", new org.deeplearning4j.nn.conf.layers.misc.FrozenLayerWithBackprop( new DenseLayer.Builder().nIn(10).nOut(10).activation(Activation.TANH) @@ -160,7 +154,7 @@ public class FrozenLayerWithBackpropTest extends BaseDL4JTest { Nd4j.getRandom().setSeed(12345); DataSet randomData = new DataSet(Nd4j.rand(100, 4), Nd4j.rand(100, 1)); - MultiLayerConfiguration conf1 = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf1 = NeuralNetConfiguration.builder() .seed(12345) .weightInit(WeightInit.XAVIER) .updater(new Sgd(2)) @@ -212,7 +206,7 @@ public class FrozenLayerWithBackpropTest extends BaseDL4JTest { String unfrozenLayer1 = unfrozenBranchName + "1"; String unfrozenBranch2 = unfrozenBranchName + "Output"; - ComputationGraphConfiguration computationGraphConf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration computationGraphConf = NeuralNetConfiguration.builder() .updater(new Sgd(2.0)) .seed(12345) .graphBuilder() @@ -258,7 +252,7 @@ public class FrozenLayerWithBackpropTest extends BaseDL4JTest { Nd4j.getRandom().setSeed(12345); DataSet randomData = new DataSet(Nd4j.rand(100, 4), Nd4j.rand(100, 1)); - MultiLayerConfiguration confSgd = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration confSgd = NeuralNetConfiguration.builder() .seed(12345) .weightInit(WeightInit.XAVIER) .updater(new Sgd(2)) @@ -269,7 +263,7 @@ public class FrozenLayerWithBackpropTest extends BaseDL4JTest { .layer(3,new OutputLayer.Builder(LossFunctions.LossFunction.MSE).updater(new Sgd(0.0)).biasUpdater(new Sgd(0.0)).activation(Activation.TANH).nIn(2).nOut(1).build()) .build(); - MultiLayerConfiguration confFrozen = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration confFrozen = NeuralNetConfiguration.builder() .seed(12345) .weightInit(WeightInit.XAVIER) .updater(new Sgd(2)) @@ -326,7 +320,7 @@ public class FrozenLayerWithBackpropTest extends BaseDL4JTest { String unfrozenLayer1 = unfrozenBranchName + "1"; String unfrozenBranch2 = unfrozenBranchName + "Output"; - ComputationGraphConfiguration computationGraphConf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration computationGraphConf = NeuralNetConfiguration.builder() .updater(new Sgd(2.0)) .seed(12345) .graphBuilder() @@ -347,7 +341,7 @@ public class FrozenLayerWithBackpropTest extends BaseDL4JTest { .setOutputs(frozenBranchOutput) .build(); - ComputationGraphConfiguration computationSgdGraphConf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration computationSgdGraphConf = NeuralNetConfiguration.builder() .updater(new Sgd(2.0)) .seed(12345) .graphBuilder() diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/OutputLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/OutputLayerTest.java index 232a9a46e..0bdf441ac 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/OutputLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/OutputLayerTest.java @@ -34,7 +34,6 @@ import org.deeplearning4j.nn.layers.recurrent.RnnOutputLayer; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.weights.WeightInit; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -import org.deeplearning4j.optimize.api.TrainingListener; import org.deeplearning4j.optimize.listeners.ScoreIterationListener; import org.junit.jupiter.api.Test; import org.nd4j.linalg.activations.Activation; @@ -57,7 +56,7 @@ public class OutputLayerTest extends BaseDL4JTest { @Test public void testSetParams() { - NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT) .updater(new Sgd(1e-1)) .layer(new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder().nIn(4).nOut(3) @@ -65,12 +64,12 @@ public class OutputLayerTest extends BaseDL4JTest { .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); - long numParams = conf.getLayer().initializer().numParams(conf); + long numParams = conf.getFirstLayer().initializer().numParams(conf); INDArray params = Nd4j.create(1, numParams); - OutputLayer l = (OutputLayer) conf.getLayer().instantiate(conf, + OutputLayer l = (OutputLayer) conf.getFirstLayer().instantiate(conf, Collections.singletonList(new ScoreIterationListener(1)), 0, params, true, params.dataType()); params = l.params(); - l.setParams(params); + l.setParamsTable(params); assertEquals(params, l.params()); } @@ -94,7 +93,7 @@ public class OutputLayerTest extends BaseDL4JTest { } } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345L).list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345L).list() .layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize) .dist(new NormalDistribution(0, 1)).activation(Activation.TANH) .updater(new NoOp()).build()) @@ -118,7 +117,7 @@ public class OutputLayerTest extends BaseDL4JTest { //As above, but for RnnOutputLayer. Expect all activations etc. to be 3d - MultiLayerConfiguration confRnn = new NeuralNetConfiguration.Builder().seed(12345L).list() + NeuralNetConfiguration confRnn = NeuralNetConfiguration.builder().seed(12345L).list() .layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize) .dist(new NormalDistribution(0, 1)).activation(Activation.TANH) .updater(new NoOp()).build()) @@ -175,7 +174,7 @@ public class OutputLayerTest extends BaseDL4JTest { } INDArray labels2d = proc.backprop(labels3d, miniBatchSize, LayerWorkspaceMgr.noWorkspaces()); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345L).list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345L).list() .layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize) .dist(new NormalDistribution(0, 1)) .activation(Activation.TANH).updater(new NoOp()).build()) @@ -192,7 +191,7 @@ public class OutputLayerTest extends BaseDL4JTest { INDArray out2d = mln.feedForward(input).get(2); INDArray out3d = proc.preProcess(out2d, miniBatchSize, LayerWorkspaceMgr.noWorkspaces()); - MultiLayerConfiguration confRnn = new NeuralNetConfiguration.Builder().seed(12345L).list() + NeuralNetConfiguration confRnn = NeuralNetConfiguration.builder().seed(12345L).list() .layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize) .dist(new NormalDistribution(0, 1)) .activation(Activation.TANH).updater(new NoOp()).build()) @@ -271,8 +270,8 @@ public class OutputLayerTest extends BaseDL4JTest { int nOut = 6; int miniBatchSize = 3; - MultiLayerConfiguration conf1 = - new NeuralNetConfiguration.Builder().seed(12345L) + NeuralNetConfiguration conf1 = + NeuralNetConfiguration.builder().seed(12345L) .updater(new NoOp()) .list() .layer(new LSTM.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH) @@ -288,8 +287,8 @@ public class OutputLayerTest extends BaseDL4JTest { mln.init(); - MultiLayerConfiguration conf2 = - new NeuralNetConfiguration.Builder().seed(12345L) + NeuralNetConfiguration conf2 = + NeuralNetConfiguration.builder().seed(12345L) .updater(new NoOp()) .list() .layer(new LSTM.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH) @@ -348,8 +347,8 @@ public class OutputLayerTest extends BaseDL4JTest { //Check that (A+identity) is equal to (identity+A), for activation A //i.e., should get same output and weight gradients for both - MultiLayerConfiguration conf1 = - new NeuralNetConfiguration.Builder().seed(12345L) + NeuralNetConfiguration conf1 = + NeuralNetConfiguration.builder().seed(12345L) .updater(new NoOp()) .convolutionMode(ConvolutionMode.Same) .inferenceWorkspaceMode(ws) @@ -364,8 +363,8 @@ public class OutputLayerTest extends BaseDL4JTest { .build()) .build(); - MultiLayerConfiguration conf2 = - new NeuralNetConfiguration.Builder().seed(12345L) + NeuralNetConfiguration conf2 = + NeuralNetConfiguration.builder().seed(12345L) .updater(new NoOp()) .convolutionMode(ConvolutionMode.Same) .inferenceWorkspaceMode(ws) @@ -438,7 +437,7 @@ public class OutputLayerTest extends BaseDL4JTest { //i.e., should get same output and weight gradients for both ComputationGraphConfiguration conf1 = - new NeuralNetConfiguration.Builder().seed(12345L) + NeuralNetConfiguration.builder().seed(12345L) .updater(new NoOp()) .convolutionMode(ConvolutionMode.Same) .inferenceWorkspaceMode(ws) @@ -456,7 +455,7 @@ public class OutputLayerTest extends BaseDL4JTest { .build(); ComputationGraphConfiguration conf2 = - new NeuralNetConfiguration.Builder().seed(12345L) + NeuralNetConfiguration.builder().seed(12345L) .updater(new NoOp()) .convolutionMode(ConvolutionMode.Same) .inferenceWorkspaceMode(ws) @@ -524,8 +523,8 @@ public class OutputLayerTest extends BaseDL4JTest { public void testCnnOutputLayerSoftmax(){ //Check that softmax is applied channels-wise - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().seed(12345L) + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder().seed(12345L) .updater(new NoOp()) .convolutionMode(ConvolutionMode.Same) .list() @@ -555,19 +554,19 @@ public class OutputLayerTest extends BaseDL4JTest { @Test public void testOutputLayerDefaults(){ - new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration.builder().list() .layer(new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder().nIn(10).nOut(10).build()) .build(); - new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration.builder().list() .layer(new org.deeplearning4j.nn.conf.layers.LossLayer.Builder().build()) .build(); - new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration.builder().list() .layer(new org.deeplearning4j.nn.conf.layers.CnnLossLayer.Builder().build()) .build(); - new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration.builder().list() .layer(new org.deeplearning4j.nn.conf.layers.CenterLossOutputLayer.Builder().build()) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/RepeatVectorTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/RepeatVectorTest.java index 3e526e774..483e34572 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/RepeatVectorTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/RepeatVectorTest.java @@ -32,8 +32,6 @@ import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.common.primitives.Pair; -import java.util.Arrays; - import static org.junit.jupiter.api.Assertions.*; public class RepeatVectorTest extends BaseDL4JTest { @@ -42,10 +40,10 @@ public class RepeatVectorTest extends BaseDL4JTest { private Layer getRepeatVectorLayer() { - NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().seed(123) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(123) .dataType(DataType.DOUBLE) .layer(new RepeatVector.Builder(REPEAT).build()).build(); - return conf.getLayer().instantiate(conf, null, 0, + return conf.getFirstLayer().instantiate(conf, null, 0, null, false, DataType.DOUBLE); } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/SeedTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/SeedTest.java index 4d46d5066..db7d4525c 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/SeedTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/SeedTest.java @@ -50,11 +50,11 @@ public class SeedTest extends BaseDL4JTest { .activation(Activation.SIGMOID).build(); NeuralNetConfiguration conf = - new NeuralNetConfiguration.Builder().layer(layerType).seed(123).build(); + NeuralNetConfiguration.builder().layer(layerType).seed(123).build(); - long numParams = conf.getLayer().initializer().numParams(conf); + long numParams = conf.getFirstLayer().initializer().numParams(conf); INDArray params = Nd4j.create(1, numParams); - Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true, params.dataType()); + Layer layer = conf.getFirstLayer().instantiate(conf, null, 0, params, true, params.dataType()); layer.setBackpropGradientsViewArray(Nd4j.create(1, numParams)); layer.fit(data.getFeatures(), LayerWorkspaceMgr.noWorkspaces()); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/TestDropout.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/TestDropout.java index 868f34ba7..e17653219 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/TestDropout.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/TestDropout.java @@ -20,26 +20,20 @@ package org.deeplearning4j.nn.layers; -import lombok.val; import org.deeplearning4j.BaseDL4JTest; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.distribution.UniformDistribution; -import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.weights.WeightInit; import org.junit.jupiter.api.Test; import org.nd4j.linalg.activations.Activation; -import org.nd4j.linalg.api.iter.NdIndexIterator; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; -import java.lang.reflect.Field; import java.util.List; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -55,7 +49,7 @@ public class TestDropout extends BaseDL4JTest { int nIn = 8; int nOut = 8; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .updater(new Sgd()) .dropOut(0.5).list() .layer(0, new OutputLayer.Builder().activation(Activation.IDENTITY) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/capsule/CapsNetMNISTTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/capsule/CapsNetMNISTTest.java index 18c285baf..6b307a68c 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/capsule/CapsNetMNISTTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/capsule/CapsNetMNISTTest.java @@ -25,7 +25,6 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.IOException; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.ActivationLayer; @@ -55,7 +54,7 @@ public class CapsNetMNISTTest extends BaseDL4JTest { @Test public void testCapsNetOnMNIST(){ - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .seed(123) .updater(new Adam()) .list() @@ -72,7 +71,7 @@ public class CapsNetMNISTTest extends BaseDL4JTest { .layer(new CapsuleStrengthLayer.Builder().build()) .layer(new ActivationLayer.Builder(new ActivationSoftmax()).build()) .layer(new LossLayer.Builder(new LossNegativeLogLikelihood()).build()) - .setInputType(InputType.convolutionalFlat(28, 28, 1)) + .inputType(InputType.convolutionalFlat(28, 28, 1)) .build(); MultiLayerNetwork model = new MultiLayerNetwork(conf); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/capsule/CapsuleLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/capsule/CapsuleLayerTest.java index 70e503c42..4536b915b 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/capsule/CapsuleLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/capsule/CapsuleLayerTest.java @@ -26,7 +26,6 @@ import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; import org.deeplearning4j.BaseDL4JTest; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.CapsuleLayer; @@ -81,11 +80,11 @@ public class CapsuleLayerTest extends BaseDL4JTest { @Test public void testLayer(){ - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .seed(123) .list() .layer(new CapsuleLayer.Builder(10, 16, 3).build()) - .setInputType(InputType.recurrent(10, 8)) + .inputType(InputType.recurrent(10, 8)) .build(); MultiLayerNetwork model = new MultiLayerNetwork(conf); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/capsule/CapsuleStrengthLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/capsule/CapsuleStrengthLayerTest.java index fac472d68..388d380dc 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/capsule/CapsuleStrengthLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/capsule/CapsuleStrengthLayerTest.java @@ -24,7 +24,6 @@ import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; import org.deeplearning4j.BaseDL4JTest; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.CapsuleStrengthLayer; @@ -52,11 +51,11 @@ public class CapsuleStrengthLayerTest extends BaseDL4JTest { @Test public void testLayer(){ - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .seed(123) .list() .layer(new CapsuleStrengthLayer.Builder().build()) - .setInputType(InputType.recurrent(5, 8)) + .inputType(InputType.recurrent(5, 8)) .build(); MultiLayerNetwork model = new MultiLayerNetwork(conf); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/capsule/PrimaryCapsulesTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/capsule/PrimaryCapsulesTest.java index 5840ec85f..12f63e7ec 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/capsule/PrimaryCapsulesTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/capsule/PrimaryCapsulesTest.java @@ -26,7 +26,6 @@ import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; import org.deeplearning4j.BaseDL4JTest; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.PrimaryCapsules; @@ -106,7 +105,7 @@ public class PrimaryCapsulesTest extends BaseDL4JTest { @Test public void testLayer(){ - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .seed(123) .list() .layer(new PrimaryCapsules.Builder(8, 10) @@ -114,7 +113,7 @@ public class PrimaryCapsulesTest extends BaseDL4JTest { .stride(4, 4) .useLeakyReLU(0.5) .build()) - .setInputType(InputType.convolutional(20, 20, 20)) + .inputType(InputType.convolutional(20, 20, 20)) .build(); MultiLayerNetwork model = new MultiLayerNetwork(conf); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvDataFormatTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvDataFormatTests.java index 7c07bfeb2..44ee236c8 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvDataFormatTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvDataFormatTests.java @@ -758,8 +758,8 @@ public class ConvDataFormatTests extends BaseDL4JTest { } } - private MultiLayerNetwork getNetWithLayer(Layer layer, CNN2DFormat format, ConvolutionMode cm, InputType inputType) { - NeuralNetConfiguration.ListBuilder builder = new NeuralNetConfiguration.Builder() + private MultiLayerNetwork getNetWithLayer(LayerConfiguration layer, CNN2DFormat format, ConvolutionMode cm, InputType inputType) { + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = (NeuralNetConfiguration.NeuralNetConfigurationBuilder) NeuralNetConfiguration.builder() .dataType(this.dataType) .seed(12345) .convolutionMode(cm) @@ -774,7 +774,7 @@ public class ConvDataFormatTests extends BaseDL4JTest { .layer(layer) .layer(new OutputLayer.Builder().nOut(10) .activation(Activation.SOFTMAX).build()) - .setInputType(inputType != null ? inputType : InputType.convolutional(12, 12, 3, format)); + .inputType(inputType != null ? inputType : InputType.convolutional(12, 12, 3, format)); if(format == CNN2DFormat.NHWC && !(layer instanceof GlobalPoolingLayer)){ //Add a preprocessor due to the differences in how NHWC and NCHW activations are flattened @@ -799,7 +799,7 @@ public class ConvDataFormatTests extends BaseDL4JTest { } private MultiLayerNetwork getCnnLossNet(CNN2DFormat format, boolean setOnLayerAlso, ConvolutionMode cm){ - NeuralNetConfiguration.ListBuilder builder = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = (NeuralNetConfiguration.NeuralNetConfigurationBuilder) NeuralNetConfiguration.builder() .seed(12345) .convolutionMode(cm) .list() @@ -819,7 +819,7 @@ public class ConvDataFormatTests extends BaseDL4JTest { .activation(Activation.SOFTMAX).build()); } - builder.setInputType(InputType.convolutional(12, 12, 3, format)); + builder.inputType(InputType.convolutional(12, 12, 3, format)); MultiLayerNetwork net = new MultiLayerNetwork(builder.build()); net.init(); @@ -984,24 +984,24 @@ public class ConvDataFormatTests extends BaseDL4JTest { for(CNN2DFormat df : CNN2DFormat.values()) { for(int i = 0; i < 4; i++) { - NeuralNetConfiguration.ListBuilder b = new NeuralNetConfiguration.Builder() - .list(); + NeuralNetConfiguration.NeuralNetConfigurationBuilder b = NeuralNetConfiguration.builder(); + switch (i){ case 0: b.layer(new ConvolutionLayer.Builder().kernelSize(2,2).nIn(3).nOut(3).dataFormat(df).build()); - b.setInputType(InputType.convolutional(12,12,3,df)); + b.inputType(InputType.convolutional(12,12,3,df)); break; case 1: b.layer(new DepthwiseConvolution2D.Builder().kernelSize(2,2).nIn(3).nOut(3).dataFormat(df).build()); - b.setInputType(InputType.convolutional(12,12,3,df)); + b.inputType(InputType.convolutional(12,12,3,df)); break; case 2: b.layer(new Deconvolution2D.Builder().dataFormat(df).kernelSize(2,2).nIn(3).nOut(3).build()); - b.setInputType(InputType.convolutional(12,12,3,df)); + b.inputType(InputType.convolutional(12,12,3,df)); break; case 3: b.layer(new SeparableConvolution2D.Builder().dataFormat(df).kernelSize(2,2).nIn(3).nOut(3).build()); - b.setInputType(InputType.convolutional(12,12,3,df)); + b.inputType(InputType.convolutional(12,12,3,df)); break; } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/Convolution3DTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/Convolution3DTest.java index d282690bb..d4a685a3a 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/Convolution3DTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/Convolution3DTest.java @@ -34,8 +34,6 @@ import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.factory.Nd4j; -import java.util.Arrays; - import static org.junit.jupiter.api.Assertions.*; /** @@ -86,15 +84,15 @@ public class Convolution3DTest extends BaseDL4JTest { } private Layer getConvolution3DLayer(ConvolutionMode mode) { - NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer).seed(123) .layer(new Convolution3D.Builder().kernelSize(kernelSize).nIn(nChannelsIn).nOut(nChannelsOut) .dataFormat(Convolution3D.DataFormat.NCDHW).convolutionMode(mode).hasBias(false) .build()) .build(); - long numParams = conf.getLayer().initializer().numParams(conf); + long numParams = conf.getFirstLayer().initializer().numParams(conf); INDArray params = Nd4j.ones(1, numParams); - return conf.getLayer().instantiate(conf, null, 0, params, true, params.dataType()); + return conf.getFirstLayer().instantiate(conf, null, 0, params, true, params.dataType()); } public INDArray getData() throws Exception { diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayerSetupTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayerSetupTest.java index 246dfee5b..1af476e5e 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayerSetupTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayerSetupTest.java @@ -27,8 +27,8 @@ import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator; import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; import org.deeplearning4j.nn.api.OptimizationAlgorithm; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration.NeuralNetConfigurationBuilder; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.*; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; @@ -71,10 +71,10 @@ public class ConvolutionLayerSetupTest extends BaseDL4JTest { @Test public void testConvolutionLayerSetup() { - MultiLayerConfiguration.Builder builder = inComplete(); - builder.setInputType(InputType.convolutionalFlat(28, 28, 1)); - MultiLayerConfiguration completed = complete().build(); - MultiLayerConfiguration test = builder.build(); + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = inComplete(); + builder.inputType(InputType.convolutionalFlat(28, 28, 1)); + NeuralNetConfiguration completed = complete().build(); + NeuralNetConfiguration test = builder.build(); assertEquals(completed, test); } @@ -90,7 +90,7 @@ public class ConvolutionLayerSetupTest extends BaseDL4JTest { int seed = 123; //setup the network - MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(seed) + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().seed(seed) .l1(1e-1).l2(2e-4).dropOut(0.5).miniBatch(true) .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).list() .layer(0, new ConvolutionLayer.Builder(5, 5).nOut(5).dropOut(0.5).weightInit(WeightInit.XAVIER) @@ -106,7 +106,7 @@ public class ConvolutionLayerSetupTest extends BaseDL4JTest { .nOut(outputNum).weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX) .build()) - .setInputType(InputType.convolutional(numRows, numColumns, nChannels)); + .inputType(InputType.convolutional(numRows, numColumns, nChannels)); DataSet d = new DataSet(Nd4j.rand(10, nChannels, numRows, numColumns), FeatureUtil.toOutcomeMatrix(new int[] {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 6)); @@ -119,10 +119,10 @@ public class ConvolutionLayerSetupTest extends BaseDL4JTest { @Test public void testMnistLenet() throws Exception { - MultiLayerConfiguration.Builder incomplete = incompleteMnistLenet(); - incomplete.setInputType(InputType.convolutionalFlat(28, 28, 1)); + NeuralNetConfiguration.NeuralNetConfigurationBuilder incomplete = incompleteMnistLenet(); + incomplete.inputType(InputType.convolutionalFlat(28, 28, 1)); - MultiLayerConfiguration testConf = incomplete.build(); + NeuralNetConfiguration testConf = incomplete.build(); assertEquals(800, ((FeedForwardLayer) testConf.getConf(4).getLayer()).getNIn()); assertEquals(500, ((FeedForwardLayer) testConf.getConf(5).getLayer()).getNIn()); @@ -141,9 +141,9 @@ public class ConvolutionLayerSetupTest extends BaseDL4JTest { INDArray labels = Nd4j.rand(10, 2); DataSet next = new DataSet(in, labels); - NeuralNetConfiguration.ListBuilder builder = (NeuralNetConfiguration.ListBuilder) incompleteLFW(); - builder.setInputType(InputType.convolutional(28, 28, 3)); - MultiLayerConfiguration conf = builder.build(); + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = incompleteLFW(); + builder.inputType(InputType.convolutional(28, 28, 3)); + NeuralNetConfiguration conf = builder.build(); ConvolutionLayer layer2 = (ConvolutionLayer) conf.getConf(2).getLayer(); assertEquals(6, layer2.getNIn()); @@ -163,10 +163,10 @@ public class ConvolutionLayerSetupTest extends BaseDL4JTest { reader.initialize(new FileSplit(new File(rootDir))); DataSetIterator recordReader = new RecordReaderDataSetIterator(reader, 10, 1, labels.size()); labels.remove("lfwtest"); - NeuralNetConfiguration.ListBuilder builder = (NeuralNetConfiguration.ListBuilder) incompleteLRN(); - builder.setInputType(InputType.convolutional(28, 28, 3)); + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = incompleteLRN(); + builder.inputType(InputType.convolutional(28, 28, 3)); - MultiLayerConfiguration conf = builder.build(); + NeuralNetConfiguration conf = builder.build(); ConvolutionLayer layer2 = (ConvolutionLayer) conf.getConf(3).getLayer(); assertEquals(6, layer2.getNIn()); @@ -174,70 +174,70 @@ public class ConvolutionLayerSetupTest extends BaseDL4JTest { } - public MultiLayerConfiguration.Builder incompleteLRN() { - MultiLayerConfiguration.Builder builder = - new NeuralNetConfiguration.Builder().seed(3) - .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).list() - .layer(0, new org.deeplearning4j.nn.conf.layers.ConvolutionLayer.Builder( - new int[] {5, 5}).nOut(6).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.SubsamplingLayer.Builder( - new int[] {2, 2}).build()) - .layer(2, new LocalResponseNormalization.Builder().build()) - .layer(3, new org.deeplearning4j.nn.conf.layers.ConvolutionLayer.Builder( - new int[] {5, 5}).nOut(6).build()) - .layer(4, new org.deeplearning4j.nn.conf.layers.SubsamplingLayer.Builder( - new int[] {2, 2}).build()) - .layer(5, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( - LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).nOut(2) - .activation(Activation.SOFTMAX).build()); + public NeuralNetConfiguration.NeuralNetConfigurationBuilder incompleteLRN() { + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = + NeuralNetConfiguration.builder().seed(3) + .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).list() + .layer(0, new ConvolutionLayer.Builder( + new int[] {5, 5}).nOut(6).build()) + .layer(1, new SubsamplingLayer.Builder( + new int[] {2, 2}).build()) + .layer(2, new LocalResponseNormalization.Builder().build()) + .layer(3, new ConvolutionLayer.Builder( + new int[] {5, 5}).nOut(6).build()) + .layer(4, new SubsamplingLayer.Builder( + new int[] {2, 2}).build()) + .layer(5, new OutputLayer.Builder( + LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).nOut(2) + .activation(Activation.SOFTMAX).build()); return builder; } - public MultiLayerConfiguration.Builder incompleteLFW() { - MultiLayerConfiguration.Builder builder = - new NeuralNetConfiguration.Builder().seed(3) - .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).list() - .layer(0, new org.deeplearning4j.nn.conf.layers.ConvolutionLayer.Builder( - new int[] {5, 5}).nOut(6).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.SubsamplingLayer.Builder( - new int[] {2, 2}).build()) - .layer(2, new org.deeplearning4j.nn.conf.layers.ConvolutionLayer.Builder( - new int[] {5, 5}).nOut(6).build()) - .layer(3, new org.deeplearning4j.nn.conf.layers.SubsamplingLayer.Builder( - new int[] {2, 2}).build()) - .layer(4, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( - LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).activation(Activation.SOFTMAX) - .nOut(2).build()); + public NeuralNetConfiguration.NeuralNetConfigurationBuilder incompleteLFW() { + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = + NeuralNetConfiguration.builder().seed(3) + .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).list() + .layer(0, new ConvolutionLayer.Builder( + new int[] {5, 5}).nOut(6).build()) + .layer(1, new SubsamplingLayer.Builder( + new int[] {2, 2}).build()) + .layer(2, new ConvolutionLayer.Builder( + new int[] {5, 5}).nOut(6).build()) + .layer(3, new SubsamplingLayer.Builder( + new int[] {2, 2}).build()) + .layer(4, new OutputLayer.Builder( + LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).activation(Activation.SOFTMAX) + .nOut(2).build()); return builder; } - public MultiLayerConfiguration.Builder incompleteMnistLenet() { - MultiLayerConfiguration.Builder builder = - new NeuralNetConfiguration.Builder().seed(3) - .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).list() - .layer(0, new org.deeplearning4j.nn.conf.layers.ConvolutionLayer.Builder( - new int[] {5, 5}).nIn(1).nOut(20).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.SubsamplingLayer.Builder( - new int[] {2, 2}, new int[] {2, 2}).build()) - .layer(2, new org.deeplearning4j.nn.conf.layers.ConvolutionLayer.Builder( - new int[] {5, 5}).nIn(20).nOut(50).build()) - .layer(3, new org.deeplearning4j.nn.conf.layers.SubsamplingLayer.Builder( - new int[] {2, 2}, new int[] {2, 2}).build()) - .layer(4, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nOut(500) - .build()) - .layer(5, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( - LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) - .activation(Activation.SOFTMAX).nOut(10) - .build()); + public NeuralNetConfiguration.NeuralNetConfigurationBuilder incompleteMnistLenet() { + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = + NeuralNetConfiguration.builder().seed(3) + .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).list() + .layer(0, new ConvolutionLayer.Builder( + new int[] {5, 5}).nIn(1).nOut(20).build()) + .layer(1, new SubsamplingLayer.Builder( + new int[] {2, 2}, new int[] {2, 2}).build()) + .layer(2, new ConvolutionLayer.Builder( + new int[] {5, 5}).nIn(20).nOut(50).build()) + .layer(3, new SubsamplingLayer.Builder( + new int[] {2, 2}, new int[] {2, 2}).build()) + .layer(4, new DenseLayer.Builder().nOut(500) + .build()) + .layer(5, new OutputLayer.Builder( + LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) + .activation(Activation.SOFTMAX).nOut(10) + .build()); return builder; } - public MultiLayerConfiguration mnistLenet() { - MultiLayerConfiguration builder = - new NeuralNetConfiguration.Builder().seed(3) + public NeuralNetConfiguration mnistLenet() { + NeuralNetConfiguration builder = + NeuralNetConfiguration.builder().seed(3) .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).list() .layer(0, new org.deeplearning4j.nn.conf.layers.ConvolutionLayer.Builder( new int[] {5, 5}).nIn(1).nOut(6).build()) @@ -254,12 +254,12 @@ public class ConvolutionLayerSetupTest extends BaseDL4JTest { return builder; } - public MultiLayerConfiguration.Builder inComplete() { + public NeuralNetConfiguration.NeuralNetConfigurationBuilder inComplete() { int nChannels = 1; int outputNum = 10; int seed = 123; - MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(seed) + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().seed(seed) .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).list() .layer(0, new org.deeplearning4j.nn.conf.layers.ConvolutionLayer.Builder(new int[] {10, 10}, new int[] {2, 2}).nIn(nChannels).nOut(6).build()) @@ -274,14 +274,14 @@ public class ConvolutionLayerSetupTest extends BaseDL4JTest { } - public MultiLayerConfiguration.Builder complete() { + public NeuralNetConfiguration.NeuralNetConfigurationBuilder complete() { final int numRows = 28; final int numColumns = 28; int nChannels = 1; int outputNum = 10; int seed = 123; - MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(seed) + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().seed(seed) .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).list() .layer(0, new org.deeplearning4j.nn.conf.layers.ConvolutionLayer.Builder(new int[] {10, 10}, new int[] {2, 2}).nIn(nChannels).nOut(6).build()) @@ -301,15 +301,15 @@ public class ConvolutionLayerSetupTest extends BaseDL4JTest { @Test public void testDeconvolution() { - MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().list() //out = stride * (in-1) + filter - 2*pad -> 2 * (28-1) + 2 - 0 = 56 -> 56x56x3 .layer(0, new Deconvolution2D.Builder(2, 2).padding(0, 0).stride(2, 2).nIn(1).nOut(3).build()) //(56-2+2*1)/2+1 = 29 -> 29x29x3 .layer(1, new SubsamplingLayer.Builder().kernelSize(2, 2).padding(1, 1).stride(2, 2).build()) .layer(2, new OutputLayer.Builder().nOut(3).activation(Activation.SOFTMAX).build()) - .setInputType(InputType.convolutional(28, 28, 1)); + .inputType(InputType.convolutional(28, 28, 1)); - MultiLayerConfiguration conf = builder.build(); + NeuralNetConfiguration conf = builder.build(); assertNotNull(conf.getInputPreProcess(2)); assertTrue(conf.getInputPreProcess(2) instanceof CnnToFeedForwardPreProcessor); @@ -324,13 +324,13 @@ public class ConvolutionLayerSetupTest extends BaseDL4JTest { @Test public void testSubSamplingWithPadding() { - MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().list() .layer(0, new ConvolutionLayer.Builder(2, 2).padding(0, 0).stride(2, 2).nIn(1).nOut(3).build()) //(28-2+0)/2+1 = 14 .layer(1, new SubsamplingLayer.Builder().kernelSize(2, 2).padding(1, 1).stride(2, 2).build()) //(14-2+2)/2+1 = 8 -> 8x8x3 .layer(2, new OutputLayer.Builder().nOut(3).activation(Activation.SOFTMAX).build()) - .setInputType(InputType.convolutional(28, 28, 1)); + .inputType(InputType.convolutional(28, 28, 1)); - MultiLayerConfiguration conf = builder.build(); + NeuralNetConfiguration conf = builder.build(); assertNotNull(conf.getInputPreProcess(2)); assertTrue(conf.getInputPreProcess(2) instanceof CnnToFeedForwardPreProcessor); @@ -345,13 +345,13 @@ public class ConvolutionLayerSetupTest extends BaseDL4JTest { @Test public void testUpsampling() { - MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().list() .layer(new ConvolutionLayer.Builder(2, 2).padding(0, 0).stride(2, 2).nIn(1).nOut(3).build()) //(28-2+0)/2+1 = 14 .layer(new Upsampling2D.Builder().size(3).build()) // 14 * 3 = 42! .layer(new OutputLayer.Builder().nOut(3).activation(Activation.SOFTMAX).build()) - .setInputType(InputType.convolutional(28, 28, 1)); + .inputType(InputType.convolutional(28, 28, 1)); - MultiLayerConfiguration conf = builder.build(); + NeuralNetConfiguration conf = builder.build(); assertNotNull(conf.getInputPreProcess(2)); assertTrue(conf.getInputPreProcess(2) instanceof CnnToFeedForwardPreProcessor); @@ -368,13 +368,13 @@ public class ConvolutionLayerSetupTest extends BaseDL4JTest { int[] blocks = new int[] {2, 2}; - MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().list() .layer(new ConvolutionLayer.Builder(2, 2).padding(0, 0).stride(2, 2).nIn(1).nOut(3).build()) //(28-2+0)/2+1 = 14 .layer(new SpaceToBatchLayer.Builder(blocks).build()) // Divide space dimensions by blocks, i.e. 14/2 = 7 .layer(new OutputLayer.Builder().nOut(3).activation(Activation.SOFTMAX).build()) - .setInputType(InputType.convolutional(28, 28, 1)); + .inputType(InputType.convolutional(28, 28, 1)); - MultiLayerConfiguration conf = builder.build(); + NeuralNetConfiguration conf = builder.build(); assertNotNull(conf.getInputPreProcess(2)); assertTrue(conf.getInputPreProcess(2) instanceof CnnToFeedForwardPreProcessor); @@ -389,15 +389,15 @@ public class ConvolutionLayerSetupTest extends BaseDL4JTest { int blocks = 2; - MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().list() //(28-2+0)/2+1 = 14 -> 14x14x3 out .layer(new ConvolutionLayer.Builder(2, 2).padding(0, 0).stride(2, 2).nIn(1).nOut(3).build()) // Divide space dimensions by blocks, i.e. 14/2 = 7 -> 7x7x12 out (3x2x2 depth) .layer(new SpaceToDepthLayer.Builder(blocks, SpaceToDepthLayer.DataFormat.NCHW).build()) .layer(new OutputLayer.Builder().nIn(3 * 2 * 2).nOut(3).activation(Activation.SOFTMAX).build()) // nIn of the next layer gets multiplied by 2*2. - .setInputType(InputType.convolutional(28, 28, 1)); + .inputType(InputType.convolutional(28, 28, 1)); - MultiLayerConfiguration conf = builder.build(); + NeuralNetConfiguration conf = builder.build(); assertNotNull(conf.getInputPreProcess(2)); assertTrue(conf.getInputPreProcess(2) instanceof CnnToFeedForwardPreProcessor); @@ -415,7 +415,7 @@ public class ConvolutionLayerSetupTest extends BaseDL4JTest { DataSet next = iter.next(); // Run with separate activation layer - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(123) .weightInit(WeightInit.XAVIER).list() .layer(0, new ConvolutionLayer.Builder(new int[] {1, 1}, new int[] {1, 1}).nIn(1).nOut(6) @@ -428,7 +428,7 @@ public class ConvolutionLayerSetupTest extends BaseDL4JTest { .layer(5, new ActivationLayer.Builder().activation(Activation.RELU).build()) .layer(6, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(10).build()) - .setInputType(InputType.convolutionalFlat(28, 28, 1)).build(); + .inputType(InputType.convolutionalFlat(28, 28, 1)).build(); MultiLayerNetwork network = new MultiLayerNetwork(conf); network.init(); @@ -447,16 +447,16 @@ public class ConvolutionLayerSetupTest extends BaseDL4JTest { @Test public void testSeparableConv2D() { - MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().list() .layer( new SeparableConvolution2D.Builder(2, 2) .depthMultiplier(2) .padding(0, 0) .stride(2, 2).nIn(1).nOut(3).build()) //(28-2+0)/2+1 = 14 .layer( new SubsamplingLayer.Builder().kernelSize(2, 2).padding(1, 1).stride(2, 2).build()) //(14-2+2)/2+1 = 8 -> 8x8x3 .layer(2, new OutputLayer.Builder().nOut(3).activation(Activation.SOFTMAX).build()) - .setInputType(InputType.convolutional(28, 28, 1)); + .inputType(InputType.convolutional(28, 28, 1)); - MultiLayerConfiguration conf = builder.build(); + NeuralNetConfiguration conf = builder.build(); assertNotNull(conf.getInputPreProcess(2)); assertTrue(conf.getInputPreProcess(2) instanceof CnnToFeedForwardPreProcessor); @@ -471,7 +471,7 @@ public class ConvolutionLayerSetupTest extends BaseDL4JTest { @Test public void testDeconv2D() { - MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().list() //out = stride * (in-1) + filter - 2*pad -> 2 * (28-1) + 2 - 0 = 56 -> 56x56x3 .layer( new Deconvolution2D.Builder(2, 2) .padding(0, 0) @@ -479,9 +479,9 @@ public class ConvolutionLayerSetupTest extends BaseDL4JTest { //(56-2+2*1)/2+1 = 29 -> 29x29x3 .layer( new SubsamplingLayer.Builder().kernelSize(2, 2).padding(1, 1).stride(2, 2).build()) .layer(2, new OutputLayer.Builder().nOut(3).activation(Activation.SOFTMAX).build()) - .setInputType(InputType.convolutional(28, 28, 1)); + .inputType(InputType.convolutional(28, 28, 1)); - MultiLayerConfiguration conf = builder.build(); + NeuralNetConfiguration conf = builder.build(); assertNotNull(conf.getInputPreProcess(2)); assertTrue(conf.getInputPreProcess(2) instanceof CnnToFeedForwardPreProcessor); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayerTest.java index 6b68d6cea..0c58b8703 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayerTest.java @@ -29,20 +29,18 @@ import org.deeplearning4j.exception.DL4JInvalidInputException; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.ConvolutionMode; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration.NeuralNetConfigurationBuilder; import org.deeplearning4j.nn.conf.RNNFormat; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.Convolution1DLayer; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; import org.deeplearning4j.nn.conf.layers.*; -import org.deeplearning4j.nn.modelimport.keras.KerasModelImport; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.weights.WeightInit; import org.deeplearning4j.nn.weights.WeightInitNormal; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.junit.jupiter.api.Test; -import org.nd4j.enums.RnnDataFormat; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.activations.impl.ActivationSoftmax; import org.nd4j.linalg.api.buffer.DataType; @@ -59,8 +57,6 @@ import org.nd4j.linalg.learning.config.Nesterovs; import org.nd4j.linalg.lossfunctions.LossFunctions; import org.nd4j.linalg.lossfunctions.impl.LossMCXENT; -import java.io.File; -import java.util.Arrays; import java.util.List; import static org.junit.jupiter.api.Assertions.*; @@ -77,7 +73,7 @@ public class ConvolutionLayerTest extends BaseDL4JTest { @Test public void testTwdFirstLayer() throws Exception { - MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(123) + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().seed(123) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).l2(2e-4) .updater(new Nesterovs(0.9)).dropOut(0.5) .list().layer(0, @@ -94,10 +90,10 @@ public class ConvolutionLayerTest extends BaseDL4JTest { .dropOut(0.5).build()) .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.SQUARED_LOSS) //output layer .nOut(10).weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).build()) - .setInputType(InputType.convolutionalFlat(28, 28, 1)); + .inputType(InputType.convolutionalFlat(28, 28, 1)); DataSetIterator iter = new MnistDataSetIterator(10, 10); - MultiLayerConfiguration conf = builder.build(); + NeuralNetConfiguration conf = builder.build(); MultiLayerNetwork network = new MultiLayerNetwork(conf); network.init(); DataSet ds = iter.next(); @@ -118,21 +114,21 @@ public class ConvolutionLayerTest extends BaseDL4JTest { int kernelWidth = 3; DataSet trainInput; - MultiLayerConfiguration.Builder builder = - new NeuralNetConfiguration.Builder() - .seed(123) - .list() - .layer(0, new ConvolutionLayer.Builder(kernelHeight, kernelWidth).stride(1, 1) - .nOut(2).activation(Activation.RELU) - .weightInit(WeightInit.XAVIER).build()) - .layer(1, new SubsamplingLayer.Builder() - .poolingType(SubsamplingLayer.PoolingType.MAX) - .kernelSize(imageHeight - kernelHeight, 1).stride(1, 1).build()) - .layer(2, new OutputLayer.Builder().nOut(classes).weightInit(WeightInit.XAVIER) - .activation(Activation.SOFTMAX).build()) - .setInputType(InputType.convolutionalFlat(imageHeight, imageWidth, nChannels)); + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = + NeuralNetConfiguration.builder() + .seed(123) + .list() + .layer(0, new ConvolutionLayer.Builder(kernelHeight, kernelWidth).stride(1, 1) + .nOut(2).activation(Activation.RELU) + .weightInit(WeightInit.XAVIER).build()) + .layer(1, new SubsamplingLayer.Builder() + .poolingType(SubsamplingLayer.PoolingType.MAX) + .kernelSize(imageHeight - kernelHeight, 1).stride(1, 1).build()) + .layer(2, new OutputLayer.Builder().nOut(classes).weightInit(WeightInit.XAVIER) + .activation(Activation.SOFTMAX).build()) + .inputType(InputType.convolutionalFlat(imageHeight, imageWidth, nChannels)); - MultiLayerConfiguration conf = builder.build(); + NeuralNetConfiguration conf = builder.build(); MultiLayerNetwork model = new MultiLayerNetwork(conf); model.init(); @@ -155,9 +151,9 @@ public class ConvolutionLayerTest extends BaseDL4JTest { long batchSize = 1; INDArray arr = Nd4j.randn(batchSize,vectorLength,timeSteps); - MultiLayerConfiguration build = new NeuralNetConfiguration.Builder().seed(seed) + NeuralNetConfiguration build = NeuralNetConfiguration.builder().seed(seed) .activation(Activation.RELU) - .weightInit(new WeightInitNormal()) // better init + .weightInit(WeightInit.NORMAL) // better init .updater(new Adam(learningRate)) .list() // block 1 @@ -172,7 +168,7 @@ public class ConvolutionLayerTest extends BaseDL4JTest { .layer(new RnnLossLayer.Builder().dataFormat(RNNFormat.NCW) .activation(new ActivationSoftmax()) .lossFunction(new LossMCXENT()).build()) - .setInputType(InputType.recurrent(vectorLength,timeSteps,RNNFormat.NCW)) + .inputType(InputType.recurrent(vectorLength,timeSteps,RNNFormat.NCW)) .build(); MultiLayerNetwork network = new MultiLayerNetwork(build); @@ -196,18 +192,18 @@ public class ConvolutionLayerTest extends BaseDL4JTest { int kernelWidth = imageWidth + 1; DataSet trainInput; - MultiLayerConfiguration.Builder builder = - new NeuralNetConfiguration.Builder() - .seed(123) - .list() - .layer(0, new ConvolutionLayer.Builder(kernelHeight, kernelWidth) //(img-kernel+2*padding)/stride + 1: must be >= 1. Therefore: with p=0, kernel <= img size - .stride(1, 1).nOut(2).activation(Activation.RELU) - .weightInit(WeightInit.XAVIER).build()) - .layer(1, new OutputLayer.Builder().nOut(classes).weightInit(WeightInit.XAVIER) - .activation(Activation.SOFTMAX).build()) - .setInputType(InputType.convolutionalFlat(imageHeight, imageWidth, nChannels)); + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = + NeuralNetConfiguration.builder() + .seed(123) + .list() + .layer(0, new ConvolutionLayer.Builder(kernelHeight, kernelWidth) //(img-kernel+2*padding)/stride + 1: must be >= 1. Therefore: with p=0, kernel <= img size + .stride(1, 1).nOut(2).activation(Activation.RELU) + .weightInit(WeightInit.XAVIER).build()) + .layer(1, new OutputLayer.Builder().nOut(classes).weightInit(WeightInit.XAVIER) + .activation(Activation.SOFTMAX).build()) + .inputType(InputType.convolutionalFlat(imageHeight, imageWidth, nChannels)); - MultiLayerConfiguration conf = builder.build(); + NeuralNetConfiguration conf = builder.build(); MultiLayerNetwork model = new MultiLayerNetwork(conf); model.init(); @@ -232,19 +228,19 @@ public class ConvolutionLayerTest extends BaseDL4JTest { int kernelWidth = imageWidth; DataSet trainInput; - MultiLayerConfiguration.Builder builder = - new NeuralNetConfiguration.Builder() - .seed(123) - .list() - .layer(0, new ConvolutionLayer.Builder(kernelHeight, kernelWidth).stride(1, 0) - .nOut(2).activation(Activation.RELU) - .weightInit(WeightInit.XAVIER).build()) - .layer(1, new OutputLayer.Builder().nOut(classes).weightInit(WeightInit.XAVIER) - .activation(Activation.SOFTMAX).build()) + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = + NeuralNetConfiguration.builder() + .seed(123) + .list() + .layer(0, new ConvolutionLayer.Builder(kernelHeight, kernelWidth).stride(1, 0) + .nOut(2).activation(Activation.RELU) + .weightInit(WeightInit.XAVIER).build()) + .layer(1, new OutputLayer.Builder().nOut(classes).weightInit(WeightInit.XAVIER) + .activation(Activation.SOFTMAX).build()) - .setInputType(InputType.convolutional(imageHeight, imageWidth, nChannels)); + .inputType(InputType.convolutional(imageHeight, imageWidth, nChannels)); - MultiLayerConfiguration conf = builder.build(); + NeuralNetConfiguration conf = builder.build(); MultiLayerNetwork model = new MultiLayerNetwork(conf); model.init(); @@ -260,11 +256,11 @@ public class ConvolutionLayerTest extends BaseDL4JTest { public void testCNNBiasInit() { ConvolutionLayer cnn = new ConvolutionLayer.Builder().nIn(1).nOut(3).biasInit(1).build(); - NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().layer(cnn).build(); + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().layer(cnn).build(); - val numParams = conf.getLayer().initializer().numParams(conf); + val numParams = conf.getFirstLayer().initializer().numParams(conf); INDArray params = Nd4j.create(1, numParams); - Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true, params.dataType()); + Layer layer = conf.getFirstLayer().instantiate(conf, null, 0, params, true, params.dataType()); assertEquals(1, layer.getParam("b").size(0)); } @@ -321,11 +317,11 @@ public class ConvolutionLayerTest extends BaseDL4JTest { ConvolutionLayer layer = new ConvolutionLayer.Builder(kernelSize, stride, padding).nIn(nIn).nOut(nOut) .activation(Activation.SIGMOID).build(); - NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().layer(layer).build(); + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().layer(layer).build(); - val numParams = conf.getLayer().initializer().numParams(conf); + val numParams = conf.getFirstLayer().initializer().numParams(conf); INDArray params = Nd4j.create(1, numParams); - return conf.getLayer().instantiate(conf, null, 0, params, true, params.dataType()); + return conf.getFirstLayer().instantiate(conf, null, 0, params, true, params.dataType()); } public Layer getMNISTConfig() { @@ -695,17 +691,17 @@ public class ConvolutionLayerTest extends BaseDL4JTest { int outputNum = 10; int seed = 123; - MultiLayerConfiguration.Builder conf = - new NeuralNetConfiguration.Builder().seed(seed) - .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).list() - .layer(0, new ConvolutionLayer.Builder(new int[] {10, 10}).nOut(6).build()) - .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, - new int[] {2, 2}).stride(1, 1).build()) - .layer(2, new OutputLayer.Builder( - LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) - .nOut(outputNum).weightInit(WeightInit.XAVIER) - .activation(Activation.SOFTMAX).build()) - .setInputType(InputType.convolutionalFlat(28, 28, 1)); + NeuralNetConfiguration.NeuralNetConfigurationBuilder conf = + NeuralNetConfiguration.builder().seed(seed) + .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).list() + .layer(0, new ConvolutionLayer.Builder(new int[] {10, 10}).nOut(6).build()) + .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, + new int[] {2, 2}).stride(1, 1).build()) + .layer(2, new OutputLayer.Builder( + LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) + .nOut(outputNum).weightInit(WeightInit.XAVIER) + .activation(Activation.SOFTMAX).build()) + .inputType(InputType.convolutionalFlat(28, 28, 1)); MultiLayerNetwork model = new MultiLayerNetwork(conf.build()); model.init(); @@ -718,14 +714,14 @@ public class ConvolutionLayerTest extends BaseDL4JTest { @Test public void test1dInputType(){ - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .convolutionMode(ConvolutionMode.Same) .list() .layer(new Convolution1DLayer.Builder().nOut(3).kernelSize(2).activation(Activation.TANH).build()) .layer(new Subsampling1DLayer.Builder().kernelSize(2).stride(2).build()) .layer(new Upsampling1D.Builder().size(2).build()) .layer(new RnnOutputLayer.Builder().nOut(7).activation(Activation.SOFTMAX).build()) - .setInputType(InputType.recurrent(10)) + .inputType(InputType.recurrent(10)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -751,7 +747,7 @@ public class ConvolutionLayerTest extends BaseDL4JTest { @Test public void testDeconvBadInput(){ - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() .layer(new Deconvolution2D.Builder().nIn(5).nOut(3).build()) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/LocallyConnectedLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/LocallyConnectedLayerTest.java index e4921b555..c39a785c1 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/LocallyConnectedLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/LocallyConnectedLayerTest.java @@ -25,8 +25,8 @@ import org.deeplearning4j.TestUtils; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.ConvolutionMode; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration.NeuralNetConfigurationBuilder; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.*; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; @@ -64,7 +64,7 @@ public class LocallyConnectedLayerTest extends BaseDL4JTest { @Test public void test2dForward(){ - MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(123) + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().seed(123) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).l2(2e-4) .updater(new Nesterovs(0.9)).dropOut(0.5) .list() @@ -77,9 +77,9 @@ public class LocallyConnectedLayerTest extends BaseDL4JTest { .build()) .layer(new OutputLayer.Builder(LossFunctions.LossFunction.SQUARED_LOSS) //output layer .nOut(10).weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).build()) - .setInputType(InputType.convolutionalFlat(28, 28, 3)); + .inputType(InputType.convolutionalFlat(28, 28, 3)); - MultiLayerConfiguration conf = builder.build(); + NeuralNetConfiguration conf = builder.build(); MultiLayerNetwork network = new MultiLayerNetwork(conf); network.init(); @@ -91,7 +91,7 @@ public class LocallyConnectedLayerTest extends BaseDL4JTest { @Test public void test1dForward(){ - MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(123) + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().seed(123) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).l2(2e-4) .updater(new Nesterovs(0.9)).dropOut(0.5) .list() @@ -104,9 +104,9 @@ public class LocallyConnectedLayerTest extends BaseDL4JTest { .build()) .layer(new OutputLayer.Builder(LossFunctions.LossFunction.SQUARED_LOSS) //output layer .nOut(10).weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).build()) - .setInputType(InputType.recurrent(3, 8)); + .inputType(InputType.recurrent(3, 8)); - MultiLayerConfiguration conf = builder.build(); + NeuralNetConfiguration conf = builder.build(); MultiLayerNetwork network = new MultiLayerNetwork(conf); network.init(); @@ -132,7 +132,7 @@ public class LocallyConnectedLayerTest extends BaseDL4JTest { for (int test = 0; test < 2; test++) { String msg = "Global dtype: " + globalDtype + ", network dtype: " + networkDtype + ", test=" + test; - ComputationGraphConfiguration.GraphBuilder b = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration.GraphBuilder b = NeuralNetConfiguration.builder() .dataType(networkDtype) .seed(123) .updater(new NoOp()) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/SpaceToDepthTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/SpaceToDepthTest.java index 0ee4e322f..ed8e8c99d 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/SpaceToDepthTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/SpaceToDepthTest.java @@ -31,8 +31,6 @@ import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -import java.util.Arrays; - import static org.junit.jupiter.api.Assertions.*; public class SpaceToDepthTest extends BaseDL4JTest { @@ -61,10 +59,10 @@ public class SpaceToDepthTest extends BaseDL4JTest { } private Layer getSpaceToDepthLayer() { - NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer).seed(123) .layer(new SpaceToDepthLayer.Builder(blockSize, dataFormat).build()).build(); - return conf.getLayer().instantiate(conf, null, 0, null, true, Nd4j.defaultFloatingPointType()); + return conf.getFirstLayer().instantiate(conf, null, 0, null, true, Nd4j.defaultFloatingPointType()); } @Test diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/SubsamplingLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/SubsamplingLayerTest.java index 75434a4c3..9fda734eb 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/SubsamplingLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/SubsamplingLayerTest.java @@ -24,8 +24,8 @@ import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.conf.GradientNormalization; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration.NeuralNetConfigurationBuilder; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.conf.layers.SubsamplingLayer; @@ -44,8 +44,6 @@ import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.factory.Nd4j; -import java.util.Arrays; - import static org.junit.jupiter.api.Assertions.*; /** @@ -170,11 +168,11 @@ public class SubsamplingLayerTest extends BaseDL4JTest { ////////////////////////////////////////////////////////////////////////////////// private Layer getSubsamplingLayer(SubsamplingLayer.PoolingType pooling) { - NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer).seed(123) .layer(new SubsamplingLayer.Builder(pooling, new int[] {2, 2}).build()).build(); - return conf.getLayer().instantiate(conf, null, 0, null, true, Nd4j.defaultFloatingPointType()); + return conf.getFirstLayer().instantiate(conf, null, 0, null, true, Nd4j.defaultFloatingPointType()); } public INDArray getData() throws Exception { @@ -214,23 +212,23 @@ public class SubsamplingLayerTest extends BaseDL4JTest { int kernelWidth = 3; DataSet trainInput; - MultiLayerConfiguration.Builder builder = - new NeuralNetConfiguration.Builder().seed(123).list() - .layer(0, new org.deeplearning4j.nn.conf.layers.ConvolutionLayer.Builder( - kernelHeight, kernelWidth).stride(1, 1).nOut(2) - .activation(Activation.RELU).weightInit( - WeightInit.XAVIER) - .build()) - .layer(1, new SubsamplingLayer.Builder() - .poolingType(SubsamplingLayer.PoolingType.MAX) - .kernelSize(imageHeight - kernelHeight + 2, 1) //imageHeight-kernelHeight+1 is ok: full height - .stride(1, 1).build()) - .layer(2, new OutputLayer.Builder().nOut(classes).weightInit(WeightInit.XAVIER) - .activation(Activation.SOFTMAX).build()) + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = + NeuralNetConfiguration.builder().seed(123).list() + .layer(0, new org.deeplearning4j.nn.conf.layers.ConvolutionLayer.Builder( + kernelHeight, kernelWidth).stride(1, 1).nOut(2) + .activation(Activation.RELU).weightInit( + WeightInit.XAVIER) + .build()) + .layer(1, new SubsamplingLayer.Builder() + .poolingType(SubsamplingLayer.PoolingType.MAX) + .kernelSize(imageHeight - kernelHeight + 2, 1) //imageHeight-kernelHeight+1 is ok: full height + .stride(1, 1).build()) + .layer(2, new OutputLayer.Builder().nOut(classes).weightInit(WeightInit.XAVIER) + .activation(Activation.SOFTMAX).build()) - .setInputType(InputType.convolutional(imageHeight, imageWidth, nChannels)); + .inputType(InputType.convolutional(imageHeight, imageWidth, nChannels)); - MultiLayerConfiguration conf = builder.build(); + NeuralNetConfiguration conf = builder.build(); MultiLayerNetwork model = new MultiLayerNetwork(conf); model.init(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/TestConvolutionModes.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/TestConvolutionModes.java index 35ba6d924..61f937cec 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/TestConvolutionModes.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/TestConvolutionModes.java @@ -25,7 +25,6 @@ import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.exception.DL4JException; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.ConvolutionMode; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.graph.LayerVertex; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -79,7 +78,7 @@ public class TestConvolutionModes extends BaseDL4JTest { inputData.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 9), NDArrayIndex.interval(0, 9)).assign(origData); - Layer layer; + LayerConfiguration layer; if (isSubsampling) { layer = new SubsamplingLayer.Builder().kernelSize(3, 3).stride(3, 3).padding(0, 0) .build(); @@ -90,15 +89,15 @@ public class TestConvolutionModes extends BaseDL4JTest { MultiLayerNetwork net = null; try { - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER) + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER) .convolutionMode(cm).list() .layer(0, layer).layer(1, new OutputLayer.Builder() .activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT) .nOut(3).build()) - .setInputType(InputType.convolutional(inSize, inSize, + .inputType(InputType.convolutional(inSize, inSize, inDepth)) .build(); @@ -158,7 +157,7 @@ public class TestConvolutionModes extends BaseDL4JTest { inputData.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 9), NDArrayIndex.interval(0, 9)).assign(origData); - Layer layer; + LayerConfiguration layer; if (isSubsampling) { layer = new SubsamplingLayer.Builder().kernelSize(3, 3).stride(3, 3).padding(0, 0) .build(); @@ -169,7 +168,7 @@ public class TestConvolutionModes extends BaseDL4JTest { ComputationGraph net = null; try { - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .weightInit(WeightInit.XAVIER).convolutionMode(cm).graphBuilder() .addInputs("in").addLayer("0", layer, "in") .addLayer("1", new OutputLayer.Builder() @@ -210,7 +209,7 @@ public class TestConvolutionModes extends BaseDL4JTest { @Test public void testGlobalLocalConfig() { for (ConvolutionMode cm : new ConvolutionMode[] {ConvolutionMode.Strict, ConvolutionMode.Truncate}) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER) .convolutionMode(cm).list() .layer(0, new ConvolutionLayer.Builder().kernelSize(3, 3).stride(3, 3).padding(0, 0) .nIn(3).nOut( @@ -258,7 +257,7 @@ public class TestConvolutionModes extends BaseDL4JTest { public void testGlobalLocalConfigCompGraph() { for (ConvolutionMode cm : new ConvolutionMode[] {ConvolutionMode.Strict, ConvolutionMode.Truncate, ConvolutionMode.Same}) { - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER) .convolutionMode(cm).graphBuilder().addInputs("in") .addLayer("0", new ConvolutionLayer.Builder().kernelSize(3, 3).stride(3, 3).padding(0, 0) .nIn(3).nOut( @@ -288,28 +287,28 @@ public class TestConvolutionModes extends BaseDL4JTest { .activation(Activation.SOFTMAX).nOut(3).build(), "7") .setOutputs("8").build(); - assertEquals(cm, ((ConvolutionLayer) ((LayerVertex) conf.getVertices().get("0")).getLayerConf().getLayer()) + assertEquals(cm, ((ConvolutionLayer) ((LayerVertex) conf.getVertices().get("0")).getNetConfiguration().getFirstLayer()) .getConvolutionMode()); assertEquals(ConvolutionMode.Strict, - ((ConvolutionLayer) ((LayerVertex) conf.getVertices().get("1")).getLayerConf().getLayer()) + ((ConvolutionLayer) ((LayerVertex) conf.getVertices().get("1")).getNetConfiguration().getFirstLayer()) .getConvolutionMode()); assertEquals(ConvolutionMode.Truncate, - ((ConvolutionLayer) ((LayerVertex) conf.getVertices().get("2")).getLayerConf().getLayer()) + ((ConvolutionLayer) ((LayerVertex) conf.getVertices().get("2")).getNetConfiguration().getFirstLayer()) .getConvolutionMode()); assertEquals(ConvolutionMode.Same, - ((ConvolutionLayer) ((LayerVertex) conf.getVertices().get("3")).getLayerConf().getLayer()) + ((ConvolutionLayer) ((LayerVertex) conf.getVertices().get("3")).getNetConfiguration().getFirstLayer()) .getConvolutionMode()); - assertEquals(cm, ((SubsamplingLayer) ((LayerVertex) conf.getVertices().get("4")).getLayerConf().getLayer()) + assertEquals(cm, ((SubsamplingLayer) ((LayerVertex) conf.getVertices().get("4")).getNetConfiguration().getFirstLayer()) .getConvolutionMode()); assertEquals(ConvolutionMode.Strict, - ((SubsamplingLayer) ((LayerVertex) conf.getVertices().get("5")).getLayerConf().getLayer()) + ((SubsamplingLayer) ((LayerVertex) conf.getVertices().get("5")).getNetConfiguration().getFirstLayer()) .getConvolutionMode()); assertEquals(ConvolutionMode.Truncate, - ((SubsamplingLayer) ((LayerVertex) conf.getVertices().get("6")).getLayerConf().getLayer()) + ((SubsamplingLayer) ((LayerVertex) conf.getVertices().get("6")).getNetConfiguration().getFirstLayer()) .getConvolutionMode()); assertEquals(ConvolutionMode.Same, - ((SubsamplingLayer) ((LayerVertex) conf.getVertices().get("7")).getLayerConf().getLayer()) + ((SubsamplingLayer) ((LayerVertex) conf.getVertices().get("7")).getNetConfiguration().getFirstLayer()) .getConvolutionMode()); } } @@ -437,15 +436,15 @@ public class TestConvolutionModes extends BaseDL4JTest { int kH = 3; int kW = 3; - Layer[] l = new Layer[2]; + LayerConfiguration[] l = new LayerConfiguration[2]; l[0] = new ConvolutionLayer.Builder().nOut(4).kernelSize(kH, kW).stride(sH, sW).build(); l[1] = new SubsamplingLayer.Builder().kernelSize(kH, kW).stride(sH, sW).build(); for (int i = 0; i < l.length; i++) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().convolutionMode(ConvolutionMode.Same) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().convolutionMode(ConvolutionMode.Same) .list().layer(0, l[i]).layer(1, new OutputLayer.Builder().nOut(3).activation(Activation.SOFTMAX).build()) - .setInputType(InputType.convolutional(inH, inW, inDepth)).build(); + .inputType(InputType.convolutional(inH, inW, inDepth)).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/Upsampling1DTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/Upsampling1DTest.java index 277b43c31..5d74b94fa 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/Upsampling1DTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/Upsampling1DTest.java @@ -36,8 +36,6 @@ import org.nd4j.linalg.factory.Nd4j; import org.nd4j.common.primitives.Pair; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -import java.util.Arrays; - import static org.junit.jupiter.api.Assertions.*; /** @@ -106,10 +104,10 @@ public class Upsampling1DTest extends BaseDL4JTest { private Layer getUpsampling1DLayer() { - NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer).seed(123) .layer(new Upsampling1D.Builder(size).build()).build(); - return conf.getLayer().instantiate(conf, null, 0, + return conf.getFirstLayer().instantiate(conf, null, 0, null, true, Nd4j.defaultFloatingPointType()); } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/Upsampling2DTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/Upsampling2DTest.java index e1d46f911..bfb872ba8 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/Upsampling2DTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/Upsampling2DTest.java @@ -36,8 +36,6 @@ import org.nd4j.linalg.factory.Nd4j; import org.nd4j.common.primitives.Pair; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -import java.util.Arrays; - import static org.junit.jupiter.api.Assertions.*; /** @@ -110,10 +108,10 @@ public class Upsampling2DTest extends BaseDL4JTest { private Layer getUpsamplingLayer() { - NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer).seed(123) .layer(new Upsampling2D.Builder(size).build()).build(); - return conf.getLayer().instantiate(conf, null, 0, null, true, Nd4j.defaultFloatingPointType()); + return conf.getFirstLayer().instantiate(conf, null, 0, null, true, Nd4j.defaultFloatingPointType()); } public INDArray getData() throws Exception { diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/custom/TestCustomActivation.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/custom/TestCustomActivation.java index 2f837fc2f..94994ea47 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/custom/TestCustomActivation.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/custom/TestCustomActivation.java @@ -21,21 +21,14 @@ package org.deeplearning4j.nn.layers.custom; import org.deeplearning4j.BaseDL4JTest; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.layers.custom.testclasses.CustomActivation; import org.junit.jupiter.api.Test; import org.nd4j.linalg.activations.Activation; -import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.introspect.AnnotatedClass; -import com.fasterxml.jackson.databind.jsontype.NamedType; - -import java.util.Collection; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -46,7 +39,7 @@ public class TestCustomActivation extends BaseDL4JTest { public void testCustomActivationFn() { //Second: let's create a MultiLayerCofiguration with one, and check JSON and YAML config actually works... - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1)).list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new Sgd(0.1)).list() .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).activation(new CustomActivation()).build()) .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(10).nOut(10).build()) .build(); @@ -56,10 +49,10 @@ public class TestCustomActivation extends BaseDL4JTest { // System.out.println(json); - MultiLayerConfiguration confFromJson = MultiLayerConfiguration.fromJson(json); + NeuralNetConfiguration confFromJson = NeuralNetConfiguration.fromJson(json); assertEquals(conf, confFromJson); - MultiLayerConfiguration confFromYaml = MultiLayerConfiguration.fromYaml(yaml); + NeuralNetConfiguration confFromYaml = NeuralNetConfiguration.fromYaml(yaml); assertEquals(conf, confFromYaml); } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/custom/TestCustomLayers.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/custom/TestCustomLayers.java index a0de7f2df..4ef8fab18 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/custom/TestCustomLayers.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/custom/TestCustomLayers.java @@ -22,10 +22,8 @@ package org.deeplearning4j.nn.layers.custom; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.DenseLayer; -import org.deeplearning4j.nn.conf.layers.Layer; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.layers.custom.testclasses.CustomLayer; @@ -39,13 +37,6 @@ import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.lossfunctions.LossFunctions; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.introspect.AnnotatedClass; -import com.fasterxml.jackson.databind.jsontype.NamedType; - -import java.util.Collection; -import java.util.HashSet; -import java.util.Set; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -54,8 +45,8 @@ public class TestCustomLayers extends BaseDL4JTest { @Test public void testJsonMultiLayerNetwork() { - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder().list() .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()) .layer(1, new CustomLayer(3.14159)).layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) @@ -67,10 +58,10 @@ public class TestCustomLayers extends BaseDL4JTest { // System.out.println(json); - MultiLayerConfiguration confFromJson = MultiLayerConfiguration.fromJson(json); + NeuralNetConfiguration confFromJson = NeuralNetConfiguration.fromJson(json); assertEquals(conf, confFromJson); - MultiLayerConfiguration confFromYaml = MultiLayerConfiguration.fromYaml(yaml); + NeuralNetConfiguration confFromYaml = NeuralNetConfiguration.fromYaml(yaml); assertEquals(conf, confFromYaml); } @@ -78,7 +69,7 @@ public class TestCustomLayers extends BaseDL4JTest { public void testJsonComputationGraph() { //ComputationGraph with a custom layer; check JSON and YAML config actually works... - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().graphBuilder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().graphBuilder() .addInputs("in").addLayer("0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in") .addLayer("1", new CustomLayer(3.14159), "0").addLayer("2", new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX) @@ -103,7 +94,7 @@ public class TestCustomLayers extends BaseDL4JTest { public void checkInitializationFF() { //Actually create a network with a custom layer; check initialization and forward pass - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() .layer(0, new DenseLayer.Builder().nIn(9).nOut(10).build()).layer(1, new CustomLayer(3.14159)) //hard-coded nIn/nOut of 10 .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(10).nOut(11).build()) .build(); @@ -125,8 +116,8 @@ public class TestCustomLayers extends BaseDL4JTest { @Test public void testCustomOutputLayerMLN() { //Second: let's create a MultiLayerCofiguration with one, and check JSON and YAML config actually works... - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().seed(12345).list() + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder().seed(12345).list() .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()) .layer(1, new CustomOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX) @@ -138,10 +129,10 @@ public class TestCustomLayers extends BaseDL4JTest { // System.out.println(json); - MultiLayerConfiguration confFromJson = MultiLayerConfiguration.fromJson(json); + NeuralNetConfiguration confFromJson = NeuralNetConfiguration.fromJson(json); assertEquals(conf, confFromJson); - MultiLayerConfiguration confFromYaml = MultiLayerConfiguration.fromYaml(yaml); + NeuralNetConfiguration confFromYaml = NeuralNetConfiguration.fromYaml(yaml); assertEquals(conf, confFromYaml); //Third: check initialization @@ -152,8 +143,8 @@ public class TestCustomLayers extends BaseDL4JTest { assertTrue(net.getLayer(1) instanceof CustomOutputLayerImpl); //Fourth: compare to an equivalent standard output layer (should be identical) - MultiLayerConfiguration conf2 = - new NeuralNetConfiguration.Builder().seed(12345).weightInit(WeightInit.XAVIER) + NeuralNetConfiguration conf2 = + NeuralNetConfiguration.builder().seed(12345).weightInit(WeightInit.XAVIER) .list() .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()).layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) @@ -178,7 +169,7 @@ public class TestCustomLayers extends BaseDL4JTest { @Test public void testCustomOutputLayerCG() { //Create a ComputationGraphConfiguration with custom output layer, and check JSON and YAML config actually works... - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .graphBuilder().addInputs("in") .addLayer("0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in").addLayer("1", new CustomOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(10) @@ -205,7 +196,7 @@ public class TestCustomLayers extends BaseDL4JTest { assertTrue(net.getLayer(1) instanceof CustomOutputLayerImpl); //Fourth: compare to an equivalent standard output layer (should be identical) - ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder().seed(12345) + ComputationGraphConfiguration conf2 = NeuralNetConfiguration.builder().seed(12345) .graphBuilder().addInputs("in") .addLayer("0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in").addLayer("1", new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(10).nOut(10) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/custom/testclasses/CustomLayer.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/custom/testclasses/CustomLayer.java index 1eacc4d20..f3b201d63 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/custom/testclasses/CustomLayer.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/custom/testclasses/CustomLayer.java @@ -57,9 +57,9 @@ public class CustomLayer extends FeedForwardLayer { ret.setListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(conf); return ret; } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/custom/testclasses/CustomOutputLayer.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/custom/testclasses/CustomOutputLayer.java index 88972c96a..b64a341d8 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/custom/testclasses/CustomOutputLayer.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/custom/testclasses/CustomOutputLayer.java @@ -29,7 +29,6 @@ import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.BaseOutputLayer; -import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.params.DefaultParamInitializer; import org.deeplearning4j.optimize.api.TrainingListener; import org.nd4j.linalg.api.buffer.DataType; @@ -56,9 +55,9 @@ public class CustomOutputLayer extends BaseOutputLayer { ret.setListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(conf); return ret; } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/dense/DenseTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/dense/DenseTest.java index 25c8074a8..382476fc9 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/dense/DenseTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/dense/DenseTest.java @@ -24,7 +24,6 @@ import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator; import org.deeplearning4j.eval.Evaluation; import org.deeplearning4j.nn.api.Layer; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; @@ -52,11 +51,11 @@ public class DenseTest extends BaseDL4JTest { public void testDenseBiasInit() { DenseLayer build = new DenseLayer.Builder().nIn(1).nOut(3).biasInit(1).build(); - NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().layer(build).build(); + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().layer(build).build(); - long numParams = conf.getLayer().initializer().numParams(conf); + long numParams = conf.getFirstLayer().initializer().numParams(conf); INDArray params = Nd4j.create(1, numParams); - Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true, Nd4j.defaultFloatingPointType()); + Layer layer = conf.getFirstLayer().instantiate(conf, null, 0, params, true, Nd4j.defaultFloatingPointType()); assertEquals(1, layer.getParam("b").size(0)); } @@ -124,7 +123,7 @@ public class DenseTest extends BaseDL4JTest { int outputNum = 3; long seed = 6; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(seed) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(seed) .updater(new Sgd(1e-3)).l1(0.3).l2(1e-3).list() .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(numInputs).nOut(3) .activation(Activation.TANH).weightInit(WeightInit.XAVIER).build()) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/embedding/EmbeddingLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/embedding/EmbeddingLayerTest.java index 55c26b12b..60c4e3b0d 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/embedding/EmbeddingLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/embedding/EmbeddingLayerTest.java @@ -25,7 +25,6 @@ import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.TestUtils; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.OptimizationAlgorithm; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.RNNFormat; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -47,7 +46,6 @@ import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; -import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.Random; @@ -60,7 +58,7 @@ public class EmbeddingLayerTest extends BaseDL4JTest { public void testEmbeddingLayerConfig() { for (boolean hasBias : new boolean[]{true, false}) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().activation(Activation.TANH).list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().activation(Activation.TANH).list() .layer(0, new EmbeddingLayer.Builder().hasBias(hasBias).nIn(10).nOut(5).build()) .layer(1, new OutputLayer.Builder().nIn(5).nOut(4).activation(Activation.SOFTMAX).build()) .build(); @@ -71,8 +69,8 @@ public class EmbeddingLayerTest extends BaseDL4JTest { Layer l0 = net.getLayer(0); assertEquals(org.deeplearning4j.nn.layers.feedforward.embedding.EmbeddingLayer.class, l0.getClass()); - assertEquals(10, ((FeedForwardLayer) l0.conf().getLayer()).getNIn()); - assertEquals(5, ((FeedForwardLayer) l0.conf().getLayer()).getNOut()); + assertEquals(10, ((FeedForwardLayer) l0.getLayerConfiguration()).getNIn()); + assertEquals(5, ((FeedForwardLayer) l0.getLayerConfiguration()).getNOut()); INDArray weights = l0.getParam(DefaultParamInitializer.WEIGHT_KEY); INDArray bias = l0.getParam(DefaultParamInitializer.BIAS_KEY); @@ -92,7 +90,7 @@ public class EmbeddingLayerTest extends BaseDL4JTest { int nout = 4; for (boolean hasBias : new boolean[]{true, false}) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().activation(Activation.TANH).list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().activation(Activation.TANH).list() .layer(new EmbeddingSequenceLayer.Builder().hasBias(hasBias) .inputLength(inputLength).nIn(nIn).nOut(embeddingDim).build()) .layer(new RnnOutputLayer.Builder().nIn(embeddingDim).nOut(nout).activation(Activation.SOFTMAX).build()) @@ -104,8 +102,8 @@ public class EmbeddingLayerTest extends BaseDL4JTest { Layer l0 = net.getLayer(0); assertEquals(org.deeplearning4j.nn.layers.feedforward.embedding.EmbeddingSequenceLayer.class, l0.getClass()); - assertEquals(10, ((FeedForwardLayer) l0.conf().getLayer()).getNIn()); - assertEquals(5, ((FeedForwardLayer) l0.conf().getLayer()).getNOut()); + assertEquals(10, ((FeedForwardLayer) l0.getLayerConfiguration()).getNIn()); + assertEquals(5, ((FeedForwardLayer) l0.getLayerConfiguration()).getNOut()); INDArray weights = l0.getParam(DefaultParamInitializer.WEIGHT_KEY); INDArray bias = l0.getParam(DefaultParamInitializer.BIAS_KEY); @@ -124,7 +122,7 @@ public class EmbeddingLayerTest extends BaseDL4JTest { int embeddingDim = 5; int nOut = 4; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().activation(Activation.TANH).list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().activation(Activation.TANH).list() .layer(new EmbeddingSequenceLayer.Builder().inputLength(inputLength) .hasBias(true).nIn(nClassesIn).nOut(embeddingDim).build()) .layer(new RnnOutputLayer.Builder().nIn(embeddingDim).nOut(nOut).activation(Activation.SOFTMAX).build()) @@ -155,12 +153,12 @@ public class EmbeddingLayerTest extends BaseDL4JTest { int embeddingDim = 5; int nOut = 4; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().activation(Activation.TANH).list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().activation(Activation.TANH).list() .layer(new EmbeddingSequenceLayer.Builder().inputLength(1) .hasBias(true).nIn(nClassesIn).nOut(embeddingDim).build()) .layer(new RnnOutputLayer.Builder().nIn(embeddingDim).nOut(nOut).activation(Activation.SOFTMAX).build()) .build(); - MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder().activation(Activation.TANH).list() + NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder().activation(Activation.TANH).list() .layer(0, new DenseLayer.Builder().nIn(nClassesIn).nOut(5).activation(Activation.IDENTITY).build()) .layer(1, new OutputLayer.Builder().nIn(5).nOut(4).activation(Activation.SOFTMAX).build()) .inputPreProcessor(0, new RnnToFeedForwardPreProcessor()) @@ -204,11 +202,11 @@ public class EmbeddingLayerTest extends BaseDL4JTest { int nClassesIn = 10; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().activation(Activation.TANH).list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().activation(Activation.TANH).list() .layer(0, new EmbeddingLayer.Builder().hasBias(true).nIn(nClassesIn).nOut(5).build()) .layer(1, new OutputLayer.Builder().nIn(5).nOut(4).activation(Activation.SOFTMAX).build()) .build(); - MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder().activation(Activation.TANH).list() + NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder().activation(Activation.TANH).list() .layer(0, new DenseLayer.Builder().nIn(nClassesIn).nOut(5).activation(Activation.IDENTITY).build()) .layer(1, new OutputLayer.Builder().nIn(5).nOut(4).activation(Activation.SOFTMAX).build()) .build(); @@ -247,12 +245,12 @@ public class EmbeddingLayerTest extends BaseDL4JTest { int nClassesIn = 10; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().activation(Activation.TANH).list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().activation(Activation.TANH).list() .layer(0, new EmbeddingLayer.Builder().hasBias(true).nIn(nClassesIn).nOut(5).build()).layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(5).nOut(4) .activation(Activation.SOFTMAX).build()) .build(); - MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder().activation(Activation.TANH) + NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder().activation(Activation.TANH) .weightInit(WeightInit.XAVIER).list() .layer(new DenseLayer.Builder().nIn(nClassesIn).nOut(5).activation(Activation.IDENTITY).build()) .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(5).nOut(4) @@ -308,16 +306,16 @@ public class EmbeddingLayerTest extends BaseDL4JTest { int nOut = 4; int inputLength = 1; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().activation(Activation.TANH).list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().activation(Activation.TANH).list() .layer(new EmbeddingSequenceLayer.Builder().inputLength(inputLength) .hasBias(true).nIn(nClassesIn).nOut(embeddingDim).build()) .layer(new RnnOutputLayer.Builder().nIn(embeddingDim).nOut(nOut).activation(Activation.SOFTMAX).build()) - .setInputType(InputType.recurrent(nClassesIn,inputLength,RNNFormat.NCW)) + .inputType(InputType.recurrent(nClassesIn,inputLength,RNNFormat.NCW)) .build(); - MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder().activation(Activation.TANH).list() + NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder().activation(Activation.TANH).list() .layer(new DenseLayer.Builder().nIn(nClassesIn).nOut(embeddingDim).activation(Activation.IDENTITY).build()) .layer(new RnnOutputLayer.Builder().nIn(embeddingDim).nOut(nOut).activation(Activation.SOFTMAX).build()) - .setInputType(InputType.recurrent(nClassesIn,inputLength,RNNFormat.NCW)) + .inputType(InputType.recurrent(nClassesIn,inputLength,RNNFormat.NCW)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -368,7 +366,7 @@ public class EmbeddingLayerTest extends BaseDL4JTest { int batchSize = 3; int timeSeriesLength = 8; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().activation(Activation.TANH) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().activation(Activation.TANH) .dataType(DataType.DOUBLE) .list() .layer(0, new EmbeddingLayer.Builder().hasBias(true).nIn(nClassesIn).nOut(5).build()) @@ -377,9 +375,9 @@ public class EmbeddingLayerTest extends BaseDL4JTest { .activation(Activation.SOFTMAX).build()) .inputPreProcessor(0, new RnnToFeedForwardPreProcessor()) .inputPreProcessor(1, new FeedForwardToRnnPreProcessor()) - .setInputType(InputType.recurrent(nClassesIn,timeSeriesLength, RNNFormat.NCW)) + .inputType(InputType.recurrent(nClassesIn,timeSeriesLength, RNNFormat.NCW)) .build(); - MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder().activation(Activation.TANH) + NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder().activation(Activation.TANH) .weightInit(WeightInit.XAVIER) .dataType(DataType.DOUBLE) .list() @@ -389,7 +387,7 @@ public class EmbeddingLayerTest extends BaseDL4JTest { .activation(Activation.SOFTMAX).build()) .inputPreProcessor(0, new RnnToFeedForwardPreProcessor()) .inputPreProcessor(1, new FeedForwardToRnnPreProcessor()) - .setInputType(InputType.recurrent(nClassesIn,timeSeriesLength, RNNFormat.NCW)) + .inputType(InputType.recurrent(nClassesIn,timeSeriesLength, RNNFormat.NCW)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -452,7 +450,7 @@ public class EmbeddingLayerTest extends BaseDL4JTest { for (int nExamples : miniBatchSizes) { Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(0.1)).seed(12345).list() .layer(0, new EmbeddingLayer.Builder().hasBias(true).activation(Activation.TANH).nIn(numInputClasses) @@ -463,13 +461,13 @@ public class EmbeddingLayerTest extends BaseDL4JTest { .nOut(4).build()) .inputPreProcessor(0, new RnnToFeedForwardPreProcessor()) .inputPreProcessor(2, new FeedForwardToRnnPreProcessor()) - .setInputType(InputType.recurrent(numInputClasses,timeSeriesLength, RNNFormat.NCW)) + .inputType(InputType.recurrent(numInputClasses,timeSeriesLength, RNNFormat.NCW)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); - MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(0.1)).seed(12345).list() .layer(0, new DenseLayer.Builder().activation(Activation.TANH).nIn(numInputClasses).nOut(5) @@ -480,7 +478,7 @@ public class EmbeddingLayerTest extends BaseDL4JTest { .nOut(4).build()) .inputPreProcessor(0, new RnnToFeedForwardPreProcessor()) .inputPreProcessor(2, new FeedForwardToRnnPreProcessor()) - .setInputType(InputType.recurrent(numInputClasses,timeSeriesLength, RNNFormat.NCW)) + .inputType(InputType.recurrent(numInputClasses,timeSeriesLength, RNNFormat.NCW)) .build(); MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); @@ -553,7 +551,7 @@ public class EmbeddingLayerTest extends BaseDL4JTest { el = new EmbeddingLayer.Builder().weightInit(new WordVectorsMockup()).build(); } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .seed(12345).list() .layer(el) .layer(new DenseLayer.Builder().activation(Activation.TANH).nIn(3).nOut(3).build()) @@ -577,7 +575,7 @@ public class EmbeddingLayerTest extends BaseDL4JTest { esl = new EmbeddingSequenceLayer.Builder().weightInit(new WordVectorsMockup()).build(); } - conf = new NeuralNetConfiguration.Builder() + conf = NeuralNetConfiguration.builder() .seed(12345).list() .layer(esl) .layer(new GlobalPoolingLayer()) @@ -614,7 +612,7 @@ public class EmbeddingLayerTest extends BaseDL4JTest { for (int nExamples : miniBatchSizes) { Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(0.1)).seed(12345).list() .layer(0, new EmbeddingSequenceLayer.Builder().hasBias(true).activation(Activation.TANH).nIn(numInputClasses) @@ -623,12 +621,12 @@ public class EmbeddingLayerTest extends BaseDL4JTest { .layer(2, new LSTM.Builder().activation(Activation.TANH).nIn(4).nOut(3).build()) .layer(3, new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(3) .nOut(4).build()) - .setInputType(InputType.recurrent(numInputClasses,timeSeriesLength,RNNFormat.NCW)).build(); + .inputType(InputType.recurrent(numInputClasses,timeSeriesLength,RNNFormat.NCW)).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); - MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(0.1)).seed(12345).list() .layer(0, new DenseLayer.Builder().activation(Activation.TANH).nIn(numInputClasses).nOut(5) @@ -637,7 +635,7 @@ public class EmbeddingLayerTest extends BaseDL4JTest { .layer(2, new LSTM.Builder().activation(Activation.TANH).nIn(4).nOut(3).dataFormat(RNNFormat.NCW).build()) .layer(3, new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(3) .nOut(4).build()) - .setInputType(InputType.recurrent(numInputClasses,1,RNNFormat.NCW)).build(); + .inputType(InputType.recurrent(numInputClasses,1,RNNFormat.NCW)).build(); MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); net2.init(); @@ -722,7 +720,7 @@ public class EmbeddingLayerTest extends BaseDL4JTest { @Test public void testEmbeddingDefaultActivation(){ - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() .layer(new EmbeddingLayer.Builder().nIn(10).nOut(10).build()) .layer(new EmbeddingSequenceLayer.Builder().nIn(10).nOut(10).build()) @@ -747,7 +745,7 @@ public class EmbeddingLayerTest extends BaseDL4JTest { for (boolean seq : new boolean[]{false, true}) { Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .seed(12345) .list() .layer(seq ? @@ -758,7 +756,7 @@ public class EmbeddingLayerTest extends BaseDL4JTest { net.init(); Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder() .seed(12345) .list() .layer(seq ? @@ -769,7 +767,7 @@ public class EmbeddingLayerTest extends BaseDL4JTest { net2.init(); Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf3 = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf3 = NeuralNetConfiguration.builder() .seed(12345) .list() .layer(seq ? diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/normalization/BatchNormalizationTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/normalization/BatchNormalizationTest.java index c4950d3c4..e6f85611a 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/normalization/BatchNormalizationTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/normalization/BatchNormalizationTest.java @@ -29,7 +29,6 @@ import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.ConvolutionMode; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -129,14 +128,14 @@ public class BatchNormalizationTest extends BaseDL4JTest { b.lockGammaBeta(true).gamma(gamma).beta(beta); } BatchNormalization bN = b.build(); - NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().layer(bN).build(); + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().layer(bN).build(); - long numParams = conf.getLayer().initializer().numParams(conf); + long numParams = conf.getFirstLayer().initializer().numParams(conf); INDArray params = null; if (numParams > 0) { params = Nd4j.create(1, numParams); } - Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true, params == null ? Nd4j.defaultFloatingPointType() : params.dataType()); + Layer layer = conf.getFirstLayer().instantiate(conf, null, 0, params, true, params == null ? Nd4j.defaultFloatingPointType() : params.dataType()); if (numParams > 0) { layer.setBackpropGradientsViewArray(Nd4j.create(1, numParams)); } @@ -365,7 +364,7 @@ public class BatchNormalizationTest extends BaseDL4JTest { DataSet next = iter.next(); // Run with separate activation layer - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(123) .list() .layer(0, new DenseLayer.Builder().nIn(28 * 28).nOut(10).weightInit(WeightInit.XAVIER) @@ -397,7 +396,7 @@ public class BatchNormalizationTest extends BaseDL4JTest { DataSetIterator iter = new MnistDataSetIterator(2, 2); DataSet next = iter.next(); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(123) .list() .layer(0, new ConvolutionLayer.Builder().nIn(1).nOut(6).weightInit(WeightInit.XAVIER) @@ -406,7 +405,7 @@ public class BatchNormalizationTest extends BaseDL4JTest { .layer(2, new ActivationLayer.Builder().activation(Activation.RELU).build()) .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).nOut(10).build()) - .setInputType(InputType.convolutionalFlat(28, 28, 1)).build(); + .inputType(InputType.convolutionalFlat(28, 28, 1)).build(); MultiLayerNetwork network = new MultiLayerNetwork(conf); network.init(); @@ -422,7 +421,7 @@ public class BatchNormalizationTest extends BaseDL4JTest { //Serialize the batch norm network (after training), and make sure we get same activations out as before // i.e., make sure state is properly stored - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .seed(12345) .list() .layer(0, new ConvolutionLayer.Builder().nIn(1).nOut(6).weightInit(WeightInit.XAVIER) @@ -433,7 +432,7 @@ public class BatchNormalizationTest extends BaseDL4JTest { .layer(4, new BatchNormalization.Builder().build()) .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).nOut(10).build()) - .setInputType(InputType.convolutionalFlat(28, 28, 1)).build(); + .inputType(InputType.convolutionalFlat(28, 28, 1)).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -461,9 +460,9 @@ public class BatchNormalizationTest extends BaseDL4JTest { public void testGradientAndUpdaters() throws Exception { //Global mean/variance are part of the parameter vector. Expect 0 gradient, and no-op updater for these - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .updater(Updater.RMSPROP).seed(12345).list() + .updater(Updater.RMSPROP.getIUpdaterWithDefaultConfig()).seed(12345).list() .layer(0, new ConvolutionLayer.Builder().nIn(1).nOut(6).weightInit(WeightInit.XAVIER) .activation(Activation.IDENTITY).build()) .layer(1, new BatchNormalization.Builder().build()) @@ -472,7 +471,7 @@ public class BatchNormalizationTest extends BaseDL4JTest { .layer(4, new BatchNormalization.Builder().build()) .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).nOut(10).build()) - .setInputType(InputType.convolutionalFlat(28, 28, 1)).build(); + .inputType(InputType.convolutionalFlat(28, 28, 1)).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -519,9 +518,9 @@ public class BatchNormalizationTest extends BaseDL4JTest { for(boolean useLogStd : new boolean[]{true, false}) { //First, Mnist data as 2d input (NOT taking into account convolution property) - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .updater(Updater.RMSPROP).seed(12345) + .updater(Updater.RMSPROP.getIUpdaterWithDefaultConfig()).seed(12345) .list().layer(0, new BatchNormalization.Builder().nIn(10).nOut(10).eps(1e-5).decay(0.95) .useLogStd(useLogStd).build()) @@ -586,13 +585,13 @@ public class BatchNormalizationTest extends BaseDL4JTest { //Check that the internal global mean/variance estimate is approximately correct //First, Mnist data as 2d input (NOT taking into account convolution property) - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .updater(Updater.RMSPROP).seed(12345).list() + .updater(Updater.RMSPROP.getIUpdaterWithDefaultConfig()).seed(12345).list() .layer(0, new BatchNormalization.Builder().nIn(3).nOut(3).eps(1e-5).decay(0.95).useLogStd(useLogStd).build()) .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MSE).weightInit(WeightInit.XAVIER) .activation(Activation.IDENTITY).nOut(10).build()) - .setInputType(InputType.convolutional(5, 5, 3)).build(); + .inputType(InputType.convolutional(5, 5, 3)).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -649,24 +648,24 @@ public class BatchNormalizationTest extends BaseDL4JTest { //Check that the internal global mean/variance estimate is approximately correct //First, Mnist data as 2d input (NOT taking into account convolution property) - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .updater(Updater.RMSPROP).seed(12345).list() + .updater(Updater.RMSPROP.getIUpdaterWithDefaultConfig()).seed(12345).list() .layer(0, new BatchNormalization.Builder().nIn(3).nOut(3).eps(1e-5).decay(0.95).useLogStd(false).build()) .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MSE).weightInit(WeightInit.XAVIER) .activation(Activation.IDENTITY).nOut(10).build()) - .setInputType(InputType.convolutional(5, 5, 3)).build(); + .inputType(InputType.convolutional(5, 5, 3)).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .updater(Updater.RMSPROP).seed(12345).list() + .updater(Updater.RMSPROP.getIUpdaterWithDefaultConfig()).seed(12345).list() .layer(0, new BatchNormalization.Builder().nIn(3).nOut(3).eps(1e-5).decay(0.95).useLogStd(true).build()) .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MSE).weightInit(WeightInit.XAVIER) .activation(Activation.IDENTITY).nOut(10).build()) - .setInputType(InputType.convolutional(5, 5, 3)).build(); + .inputType(InputType.convolutional(5, 5, 3)).build(); MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); net2.init(); @@ -691,7 +690,7 @@ public class BatchNormalizationTest extends BaseDL4JTest { @Test public void testBatchNorm() throws Exception { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .seed(12345) .updater(new Adam(1e-3)) .activation(Activation.TANH) @@ -700,7 +699,7 @@ public class BatchNormalizationTest extends BaseDL4JTest { .layer(new BatchNormalization()) .layer(new ConvolutionLayer.Builder().nOut(5).kernelSize(2, 2).build()) .layer(new OutputLayer.Builder().activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).nOut(10).build()) - .setInputType(InputType.convolutionalFlat(28, 28, 1)) + .inputType(InputType.convolutionalFlat(28, 28, 1)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -728,7 +727,7 @@ public class BatchNormalizationTest extends BaseDL4JTest { for (boolean rnn : new boolean[]{true, false}) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .seed(12345) .weightInit(WeightInit.XAVIER) .convolutionMode(ConvolutionMode.Same) @@ -737,7 +736,7 @@ public class BatchNormalizationTest extends BaseDL4JTest { new Convolution1DLayer.Builder().kernelSize(3).stride(1).nOut(3).build()) .layer(new BatchNormalization()) .layer(new RnnOutputLayer.Builder().nOut(3).activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE).build()) - .setInputType(InputType.recurrent(3)) + .inputType(InputType.recurrent(3)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -757,7 +756,7 @@ public class BatchNormalizationTest extends BaseDL4JTest { @Test public void testInputValidation() { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() .layer(new BatchNormalization.Builder().nIn(10).nOut(10).build()) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/normalization/LocalResponseTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/normalization/LocalResponseTest.java index e876b736b..c2f8cb3c4 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/normalization/LocalResponseTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/normalization/LocalResponseTest.java @@ -25,7 +25,6 @@ import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.GradientNormalization; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; @@ -110,7 +109,7 @@ public class LocalResponseTest extends BaseDL4JTest { @BeforeEach public void doBefore() { - NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer).seed(123) .layer(new LocalResponseNormalization.Builder().k(2).n(5).alpha(1e-4).beta(0.75).build()) .build(); @@ -140,7 +139,7 @@ public class LocalResponseTest extends BaseDL4JTest { public void testRegularization() { // Confirm a structure with regularization true will not throw an error - NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer).l1(0.2) .l2(0.1).seed(123) .layer(new LocalResponseNormalization.Builder().k(2).n(5).alpha(1e-4).beta(0.75).build()) @@ -149,7 +148,7 @@ public class LocalResponseTest extends BaseDL4JTest { @Test public void testMultiCNNLayer() throws Exception { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).seed(123).list() .layer(0, new ConvolutionLayer.Builder().nIn(1).nOut(6).weightInit(WeightInit.XAVIER) .activation(Activation.RELU).build()) @@ -159,7 +158,7 @@ public class LocalResponseTest extends BaseDL4JTest { .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).nIn(2).nOut(10) .build()) - .setInputType(InputType.convolutionalFlat(28, 28, 1)).build(); + .inputType(InputType.convolutionalFlat(28, 28, 1)).build(); MultiLayerNetwork network = new MultiLayerNetwork(conf); network.init(); @@ -203,7 +202,7 @@ public class LocalResponseTest extends BaseDL4JTest { } LocalResponseNormalization lrn = new LocalResponseNormalization.Builder().build(); - NeuralNetConfiguration nnc = new NeuralNetConfiguration.Builder().layer(lrn).build(); + NeuralNetConfiguration nnc = NeuralNetConfiguration.builder().layer(lrn).build(); org.deeplearning4j.nn.layers.normalization.LocalResponseNormalization layer = (org.deeplearning4j.nn.layers.normalization.LocalResponseNormalization) lrn.instantiate(nnc, null, 0, null, false, Nd4j.defaultFloatingPointType()); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/objdetect/TestYolo2OutputLayer.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/objdetect/TestYolo2OutputLayer.java index c732ab366..558041072 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/objdetect/TestYolo2OutputLayer.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/objdetect/TestYolo2OutputLayer.java @@ -34,7 +34,6 @@ import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.TestUtils; import org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator; import org.deeplearning4j.nn.conf.ConvolutionMode; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; @@ -88,7 +87,7 @@ public class TestYolo2OutputLayer extends BaseDL4JTest { INDArray bbPrior = Nd4j.rand(b, 2).muliRowVector(Nd4j.create(new double[]{w, h})); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .l2(0.01) .list() .layer(new ConvolutionLayer.Builder().nIn(depth).nOut(depth).kernelSize(1,1).build()) @@ -177,7 +176,7 @@ public class TestYolo2OutputLayer extends BaseDL4JTest { INDArray bbPrior = Nd4j.rand(b, 2).muliRowVector(Nd4j.create(new double[]{w, h})); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() .layer(new ConvolutionLayer.Builder().nIn(1).nOut(1).kernelSize(1,1).build()) .layer(new Yolo2OutputLayer.Builder() @@ -335,7 +334,7 @@ public class TestYolo2OutputLayer extends BaseDL4JTest { //Check IOU calculation - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() .layer(new ConvolutionLayer.Builder().kernelSize(3,3).stride(1,1).nIn(3).nOut(3).build()) .layer(new Yolo2OutputLayer.Builder() @@ -495,7 +494,7 @@ public class TestYolo2OutputLayer extends BaseDL4JTest { DataSetIterator iter = new RecordReaderDataSetIterator(rr,1,1,1,true); iter.setPreProcessor(new ImagePreProcessingScaler()); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .convolutionMode(ConvolutionMode.Same) .updater(new Adam(2e-3)) .gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue) @@ -510,7 +509,7 @@ public class TestYolo2OutputLayer extends BaseDL4JTest { .layer(new Yolo2OutputLayer.Builder() .boundingBoxPriors(bbPriors) .build()) - .setInputType(InputType.convolutional(h,w,c)) + .inputType(InputType.convolutional(h,w,c)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/ocnn/OCNNOutputLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/ocnn/OCNNOutputLayerTest.java index 0eaa156f1..c989d0bf5 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/ocnn/OCNNOutputLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/ocnn/OCNNOutputLayerTest.java @@ -23,7 +23,6 @@ package org.deeplearning4j.nn.layers.ocnn; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator; import org.deeplearning4j.gradientcheck.GradientCheckUtil; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; @@ -35,7 +34,6 @@ import org.junit.jupiter.api.io.TempDir; import org.nd4j.linalg.activations.impl.ActivationIdentity; import org.nd4j.linalg.activations.impl.ActivationReLU; import org.nd4j.linalg.activations.impl.ActivationSigmoid; -import org.nd4j.linalg.api.buffer.DataBuffer; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.DataSet; @@ -43,10 +41,7 @@ import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.learning.config.Adam; -import org.nd4j.linalg.learning.config.Nesterovs; import org.nd4j.linalg.learning.config.NoOp; -import org.nd4j.linalg.schedule.ScheduleType; -import org.nd4j.linalg.schedule.StepSchedule; import java.io.File; import java.util.UUID; @@ -128,7 +123,7 @@ public class OCNNOutputLayerTest extends BaseDL4JTest { DataSet filtered = next.filterBy(new int[]{0, 1}); for (int i = 0; i < 10; i++) { network.setEpochCount(i); - network.getLayerWiseConfigurations().setEpochCount(i); + network.getConfiguration().setEpochCount(i); network.fit(filtered); } @@ -170,7 +165,7 @@ public class OCNNOutputLayerTest extends BaseDL4JTest { private MultiLayerNetwork getSingleLayer() { int numHidden = 2; - MultiLayerConfiguration configuration = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration configuration = NeuralNetConfiguration.builder() .seed(12345) .weightInit(WeightInit.XAVIER) .miniBatch(true) @@ -182,8 +177,9 @@ public class OCNNOutputLayerTest extends BaseDL4JTest { // 1e-2, // 0.1, // 20)).build()) - .list(new DenseLayer.Builder().activation(new ActivationReLU()) - .nIn(4).nOut(2).build(), + .layer(new DenseLayer.Builder().activation(new ActivationReLU()) + .nIn(4).nOut(2).build()) + .layer( new org.deeplearning4j.nn.conf.ocnn.OCNNOutputLayer.Builder() .nIn(2).activation(new ActivationSigmoid()).initialRValue(0.1) .nu(0.1) @@ -197,10 +193,11 @@ public class OCNNOutputLayerTest extends BaseDL4JTest { public MultiLayerNetwork getGradientCheckNetwork(int numHidden) { - MultiLayerConfiguration configuration = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration configuration = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .seed(42).updater(new NoOp()).miniBatch(false) - .list(new DenseLayer.Builder().activation(new ActivationIdentity()).nIn(4).nOut(4).build(), + .layer(new DenseLayer.Builder().activation(new ActivationIdentity()).nIn(4).nOut(4).build()) + .layer( new org.deeplearning4j.nn.conf.ocnn.OCNNOutputLayer.Builder().nIn(4) .nu(0.002).activation(new ActivationSigmoid()) .hiddenLayerSize(numHidden).build()) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/pooling/GlobalPoolingMaskingTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/pooling/GlobalPoolingMaskingTests.java index a7f3d1867..86d695f3d 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/pooling/GlobalPoolingMaskingTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/pooling/GlobalPoolingMaskingTests.java @@ -22,7 +22,6 @@ package org.deeplearning4j.nn.layers.pooling; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.nn.conf.ConvolutionMode; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.layers.*; @@ -59,7 +58,7 @@ public class GlobalPoolingMaskingTests extends BaseDL4JTest { for (int miniBatchSize : minibatchSizes) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .updater(new NoOp()) .dist(new NormalDistribution(0, 1.0)).seed(12345L).list() .layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH) @@ -123,7 +122,7 @@ public class GlobalPoolingMaskingTests extends BaseDL4JTest { new PoolingType[] {PoolingType.SUM, PoolingType.AVG, PoolingType.MAX, PoolingType.PNORM}; for (PoolingType pt : poolingTypes) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER) .convolutionMode(ConvolutionMode.Same).seed(12345L).list() .layer(0, new ConvolutionLayer.Builder().nIn(depthIn).nOut(depthOut).kernelSize(height, 2) .stride(height, 1).activation(Activation.TANH).build()) @@ -186,7 +185,7 @@ public class GlobalPoolingMaskingTests extends BaseDL4JTest { new PoolingType[] {PoolingType.SUM, PoolingType.AVG, PoolingType.MAX, PoolingType.PNORM}; for (PoolingType pt : poolingTypes) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER) .convolutionMode(ConvolutionMode.Same).seed(12345L).list() .layer(0, new ConvolutionLayer.Builder().nIn(depthIn).nOut(depthOut).kernelSize(2, width) .stride(1, width).activation(Activation.TANH).build()) @@ -250,7 +249,7 @@ public class GlobalPoolingMaskingTests extends BaseDL4JTest { new PoolingType[] {PoolingType.SUM, PoolingType.AVG, PoolingType.MAX, PoolingType.PNORM}; for (PoolingType pt : poolingTypes) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER) .convolutionMode(ConvolutionMode.Same).seed(12345L).list() .layer(0, new ConvolutionLayer.Builder().nIn(depthIn).nOut(depthOut).kernelSize(height, 2) .stride(height, 1).activation(Activation.TANH).build()) @@ -309,7 +308,7 @@ public class GlobalPoolingMaskingTests extends BaseDL4JTest { new PoolingType[] {PoolingType.SUM, PoolingType.AVG, PoolingType.MAX, PoolingType.PNORM}; for (PoolingType pt : poolingTypes) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER) .convolutionMode(ConvolutionMode.Same).seed(12345L).list() .layer(0, new ConvolutionLayer.Builder().nIn(depthIn).nOut(depthOut).kernelSize(2, width) .stride(1, width).activation(Activation.TANH).build()) @@ -368,7 +367,7 @@ public class GlobalPoolingMaskingTests extends BaseDL4JTest { new PoolingType[] {PoolingType.SUM, PoolingType.AVG, PoolingType.MAX, PoolingType.PNORM}; for (PoolingType pt : poolingTypes) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER) .convolutionMode(ConvolutionMode.Same).seed(12345L).list() .layer(0, new ConvolutionLayer.Builder().nIn(depthIn).nOut(depthOut).kernelSize(2, 2) .stride(1, 1).activation(Activation.TANH).build()) @@ -434,7 +433,7 @@ public class GlobalPoolingMaskingTests extends BaseDL4JTest { for(PoolingType pt : PoolingType.values()) { //System.out.println("Net: " + networkDtype + ", mask: " + dt + ", pt=" + pt); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() .layer(new GlobalPoolingLayer(pt)) .layer(new OutputLayer.Builder().nIn(5).nOut(5).activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE).build()) @@ -447,7 +446,7 @@ public class GlobalPoolingMaskingTests extends BaseDL4JTest { net.output(in, false, mask, null); - MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder() .list() .layer(new RnnOutputLayer.Builder().nIn(5).nOut(5).activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE).build()) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/BidirectionalTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/BidirectionalTest.java index e785b36e5..8e329077c 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/BidirectionalTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/BidirectionalTest.java @@ -85,7 +85,7 @@ public class BidirectionalTest extends BaseDL4JTest { //Bidirectional(GravesLSTM) and GravesBidirectionalLSTM should be equivalent, given equivalent params //Note that GravesBidirectionalLSTM implements ADD mode only - MultiLayerConfiguration conf1 = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf1 = NeuralNetConfiguration.builder() .activation(Activation.TANH) .weightInit(WeightInit.XAVIER) .trainingWorkspaceMode(wsm) @@ -98,7 +98,7 @@ public class BidirectionalTest extends BaseDL4JTest { .nIn(10).nOut(10).build()) .build(); - MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder() .activation(Activation.TANH) .weightInit(WeightInit.XAVIER) .trainingWorkspaceMode(wsm) @@ -189,7 +189,7 @@ public class BidirectionalTest extends BaseDL4JTest { //Bidirectional(GravesLSTM) and GravesBidirectionalLSTM should be equivalent, given equivalent params //Note that GravesBidirectionalLSTM implements ADD mode only - ComputationGraphConfiguration conf1 = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf1 = NeuralNetConfiguration.builder() .activation(Activation.TANH) .weightInit(WeightInit.XAVIER) .updater(new Adam()) @@ -204,7 +204,7 @@ public class BidirectionalTest extends BaseDL4JTest { .setOutputs("2") .build(); - ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf2 = NeuralNetConfiguration.builder() .activation(Activation.TANH) .weightInit(WeightInit.XAVIER) .updater(new Adam()) @@ -288,7 +288,7 @@ public class BidirectionalTest extends BaseDL4JTest { Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf1 = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf1 = NeuralNetConfiguration.builder() .activation(Activation.TANH) .weightInit(WeightInit.XAVIER) .trainingWorkspaceMode(wsm) @@ -354,7 +354,7 @@ public class BidirectionalTest extends BaseDL4JTest { Nd4j.getRandom().setSeed(12345); - ComputationGraphConfiguration conf1 = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf1 = NeuralNetConfiguration.builder() .activation(Activation.TANH) .weightInit(WeightInit.XAVIER) .trainingWorkspaceMode(wsm) @@ -422,7 +422,7 @@ public class BidirectionalTest extends BaseDL4JTest { INDArray in = Nd4j.rand(inshape); for (Bidirectional.Mode m : modes) { - MultiLayerConfiguration conf1 = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf1 = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .activation(Activation.TANH) .weightInit(WeightInit.XAVIER) @@ -436,7 +436,7 @@ public class BidirectionalTest extends BaseDL4JTest { MultiLayerNetwork net1 = new MultiLayerNetwork(conf1); net1.init(); - MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .activation(Activation.TANH) .weightInit(WeightInit.XAVIER) @@ -548,7 +548,7 @@ public class BidirectionalTest extends BaseDL4JTest { for (Bidirectional.Mode m : modes) { - ComputationGraphConfiguration conf1 = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf1 = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .activation(Activation.TANH) .weightInit(WeightInit.XAVIER) @@ -564,7 +564,7 @@ public class BidirectionalTest extends BaseDL4JTest { ComputationGraph net1 = new ComputationGraph(conf1); net1.init(); - ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf2 = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .activation(Activation.TANH) .weightInit(WeightInit.XAVIER) @@ -680,7 +680,7 @@ public class BidirectionalTest extends BaseDL4JTest { int in = 2; int out = 2; - ComputationGraphConfiguration.GraphBuilder builder = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration.GraphBuilder builder = NeuralNetConfiguration.builder() .updater(new Adam(0.01)) .activation(Activation.RELU) .graphBuilder() diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTMTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTMTest.java index bd1291216..d51fc5280 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTMTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTMTest.java @@ -24,7 +24,6 @@ import lombok.val; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.CacheMode; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.RNNFormat; import org.deeplearning4j.nn.conf.distribution.UniformDistribution; @@ -64,15 +63,15 @@ public class GravesBidirectionalLSTMTest extends BaseDL4JTest { int nIn = 13; int nHiddenUnits = 17; - final NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() + final NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .layer(new org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM.Builder().nIn(nIn) .nOut(nHiddenUnits).dataFormat(rnnDataFormat).activation(Activation.TANH).build()) .build(); - val numParams = conf.getLayer().initializer().numParams(conf); + val numParams = conf.getFirstLayer().initializer().numParams(conf); INDArray params = Nd4j.create(1, numParams); final GravesBidirectionalLSTM layer = - (GravesBidirectionalLSTM) conf.getLayer().instantiate(conf, null, 0, params, true, params.dataType()); + (GravesBidirectionalLSTM) conf.getFirstLayer().instantiate(conf, null, 0, params, true, params.dataType()); //Data: has shape [miniBatchSize,nIn,timeSeriesLength]; //Output/activations has shape [miniBatchsize,nHiddenUnits,timeSeriesLength]; @@ -130,17 +129,17 @@ public class GravesBidirectionalLSTMTest extends BaseDL4JTest { INDArray inputData = (rnnDataFormat == RNNFormat.NCW)?Nd4j.ones(miniBatchSize, nIn, timeSeriesLength): Nd4j.ones(miniBatchSize, timeSeriesLength, nIn); - NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .layer(new org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM.Builder().nIn(nIn) .nOut(lstmNHiddenUnits).dataFormat(rnnDataFormat) .dist(new UniformDistribution(0, 1)).activation(Activation.TANH).build()) .build(); - long numParams = conf.getLayer().initializer().numParams(conf); + long numParams = conf.getFirstLayer().initializer().numParams(conf); INDArray params = Nd4j.create(1, numParams); GravesBidirectionalLSTM lstm = - (GravesBidirectionalLSTM) conf.getLayer().instantiate(conf, null, 0, params, true, params.dataType()); - lstm.setBackpropGradientsViewArray(Nd4j.create(1, conf.getLayer().initializer().numParams(conf))); + (GravesBidirectionalLSTM) conf.getFirstLayer().instantiate(conf, null, 0, params, true, params.dataType()); + lstm.setBackpropGradientsViewArray(Nd4j.create(1, conf.getFirstLayer().initializer().numParams(conf))); //Set input, do a forward pass: lstm.activate(inputData, false, LayerWorkspaceMgr.noWorkspaces()); assertNotNull(lstm.input()); @@ -202,21 +201,21 @@ public class GravesBidirectionalLSTMTest extends BaseDL4JTest { final int miniBatchSize = 4; final int timeSeriesLength = 7; - final NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() + final NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .layer(new org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM.Builder().nIn(nIn) .nOut(layerSize) .dist(new UniformDistribution(0, 1)).activation(Activation.TANH).build()) .build(); - long numParams = conf.getLayer().initializer().numParams(conf); + long numParams = conf.getFirstLayer().initializer().numParams(conf); INDArray params = Nd4j.create(1, numParams); final GravesBidirectionalLSTM lstm = - (GravesBidirectionalLSTM) conf.getLayer().instantiate(conf, null, 0, params, true, params.dataType()); + (GravesBidirectionalLSTM) conf.getFirstLayer().instantiate(conf, null, 0, params, true, params.dataType()); final INDArray input = Nd4j.rand(miniBatchSize, nIn, timeSeriesLength); lstm.setInput(input, LayerWorkspaceMgr.noWorkspaces()); - final INDArray fwdPassFalse = LSTMHelpers.activateHelper(lstm, lstm.conf(), new ActivationSigmoid(), + final INDArray fwdPassFalse = LSTMHelpers.activateHelper(lstm, lstm.getNetConfiguration(), new ActivationSigmoid(), lstm.input(), lstm.getParam(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_FORWARDS), lstm.getParam(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS), @@ -224,7 +223,7 @@ public class GravesBidirectionalLSTMTest extends BaseDL4JTest { false, true, GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS, null, true, null, CacheMode.NONE, LayerWorkspaceMgr.noWorkspaces(), true).fwdPassOutput; - final INDArray[] fwdPassTrue = LSTMHelpers.activateHelper(lstm, lstm.conf(), new ActivationSigmoid(), + final INDArray[] fwdPassTrue = LSTMHelpers.activateHelper(lstm, lstm.getNetConfiguration(), new ActivationSigmoid(), lstm.input(), lstm.getParam(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_FORWARDS), lstm.getParam(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS), @@ -260,16 +259,16 @@ public class GravesBidirectionalLSTMTest extends BaseDL4JTest { Nd4j.getRandom().setSeed(12345); - final NeuralNetConfiguration confBidirectional = new NeuralNetConfiguration.Builder() + final NeuralNetConfiguration confBidirectional = NeuralNetConfiguration.builder() .layer(new org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM.Builder().nIn(nIn) .nOut(layerSize).dataFormat(rnnDataFormat) .dist(new UniformDistribution(-0.1, 0.1)).activation(Activation.TANH).build()) .build(); - long numParams = confBidirectional.getLayer().initializer().numParams(confBidirectional); + long numParams = confBidirectional.getFirstLayer().initializer().numParams(confBidirectional); INDArray params = Nd4j.create(1, numParams); - final GravesBidirectionalLSTM bidirectionalLSTM = (GravesBidirectionalLSTM) confBidirectional.getLayer() + final GravesBidirectionalLSTM bidirectionalLSTM = (GravesBidirectionalLSTM) confBidirectional.getFirstLayer() .instantiate(confBidirectional, null, 0, params, true, params.dataType()); @@ -280,7 +279,7 @@ public class GravesBidirectionalLSTMTest extends BaseDL4JTest { params = bidirectionalLSTM.params(); - bidirectionalLSTM.setParams(params); + bidirectionalLSTM.setParamsTable(params); final INDArray act2 = bidirectionalLSTM.activate(sig, false, LayerWorkspaceMgr.noWorkspaces()); @@ -300,31 +299,31 @@ public class GravesBidirectionalLSTMTest extends BaseDL4JTest { Nd4j.getRandom().setSeed(12345); final NeuralNetConfiguration confBidirectional = - new NeuralNetConfiguration.Builder() + NeuralNetConfiguration.builder() .layer(new org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM.Builder() .nIn(nIn).nOut(layerSize).dataFormat(rnnDataFormat) .dist(new UniformDistribution(-0.1, 0.1)) .activation(Activation.TANH).updater(new NoOp()).build()) .build(); - final NeuralNetConfiguration confForwards = new NeuralNetConfiguration.Builder() + final NeuralNetConfiguration confForwards = NeuralNetConfiguration.builder() .layer(new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(layerSize).dataFormat(rnnDataFormat) .weightInit(WeightInit.ZERO).activation(Activation.TANH).build()) .build(); - long numParams = confForwards.getLayer().initializer().numParams(confForwards); + long numParams = confForwards.getFirstLayer().initializer().numParams(confForwards); INDArray params = Nd4j.create(1, numParams); - long numParamsBD = confBidirectional.getLayer().initializer().numParams(confBidirectional); + long numParamsBD = confBidirectional.getFirstLayer().initializer().numParams(confBidirectional); INDArray paramsBD = Nd4j.create(1, numParamsBD); - final GravesBidirectionalLSTM bidirectionalLSTM = (GravesBidirectionalLSTM) confBidirectional.getLayer() + final GravesBidirectionalLSTM bidirectionalLSTM = (GravesBidirectionalLSTM) confBidirectional.getFirstLayer() .instantiate(confBidirectional, null, 0, paramsBD, true, params.dataType()); final GravesLSTM forwardsLSTM = - (GravesLSTM) confForwards.getLayer().instantiate(confForwards, null, 0, params, true, params.dataType()); + (GravesLSTM) confForwards.getFirstLayer().instantiate(confForwards, null, 0, params, true, params.dataType()); bidirectionalLSTM.setBackpropGradientsViewArray( - Nd4j.create(1, confBidirectional.getLayer().initializer().numParams(confBidirectional))); + Nd4j.create(1, confBidirectional.getFirstLayer().initializer().numParams(confBidirectional))); forwardsLSTM.setBackpropGradientsViewArray( - Nd4j.create(1, confForwards.getLayer().initializer().numParams(confForwards))); + Nd4j.create(1, confForwards.getFirstLayer().initializer().numParams(confForwards))); final INDArray sig = (rnnDataFormat == RNNFormat.NCW)?Nd4j.rand(miniBatchSize, nIn, timeSeriesLength): @@ -501,7 +500,7 @@ public class GravesBidirectionalLSTMTest extends BaseDL4JTest { @Test public void testSerialization() { - final MultiLayerConfiguration conf1 = new NeuralNetConfiguration.Builder() + final NeuralNetConfiguration conf1 = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new AdaGrad(0.1)) .l2(0.001) @@ -520,7 +519,7 @@ public class GravesBidirectionalLSTMTest extends BaseDL4JTest { final String json1 = conf1.toJson(); - final MultiLayerConfiguration conf2 = MultiLayerConfiguration.fromJson(json1); + final NeuralNetConfiguration conf2 = NeuralNetConfiguration.fromJson(json1); final String json2 = conf1.toJson(); @@ -532,7 +531,7 @@ public class GravesBidirectionalLSTMTest extends BaseDL4JTest { public void testGateActivationFnsSanityCheck() { for (String gateAfn : new String[] {"sigmoid", "hardsigmoid"}) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .seed(12345).list() .layer(0, new org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM.Builder() @@ -546,8 +545,8 @@ public class GravesBidirectionalLSTMTest extends BaseDL4JTest { MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); - assertEquals(gateAfn, ((org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM) net.getLayer(0).conf() - .getLayer()).getGateActivationFn().toString()); + assertEquals(gateAfn, ((org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM) net.getLayer(0).getNetConfiguration() + .getFirstLayer()).getGateActivationFn().toString()); INDArray in = Nd4j.rand(3, 2, 5); INDArray labels = Nd4j.rand(3, 2, 5); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/GravesLSTMTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/GravesLSTMTest.java index 679066755..2868c08d8 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/GravesLSTMTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/GravesLSTMTest.java @@ -24,7 +24,6 @@ import lombok.val; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.common.config.DL4JClassLoading; import org.deeplearning4j.nn.api.OptimizationAlgorithm; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.distribution.UniformDistribution; import org.deeplearning4j.nn.gradient.Gradient; @@ -59,14 +58,14 @@ public class GravesLSTMTest extends BaseDL4JTest { int nHiddenUnits = 17; NeuralNetConfiguration conf = - new NeuralNetConfiguration.Builder() + NeuralNetConfiguration.builder() .layer(new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn) .nOut(nHiddenUnits).activation(Activation.TANH).build()) .build(); - val numParams = conf.getLayer().initializer().numParams(conf); + val numParams = conf.getFirstLayer().initializer().numParams(conf); INDArray params = Nd4j.create(1, numParams); - GravesLSTM layer = (GravesLSTM) conf.getLayer().instantiate(conf, null, 0, params, true, params.dataType()); + GravesLSTM layer = (GravesLSTM) conf.getFirstLayer().instantiate(conf, null, 0, params, true, params.dataType()); //Data: has shape [miniBatchSize,nIn,timeSeriesLength]; //Output/activations has shape [miniBatchsize,nHiddenUnits,timeSeriesLength]; @@ -104,16 +103,16 @@ public class GravesLSTMTest extends BaseDL4JTest { INDArray inputData = Nd4j.ones(miniBatchSize, nIn, timeSeriesLength); - NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .layer(new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn) .nOut(lstmNHiddenUnits) .dist(new UniformDistribution(0, 1)).activation(Activation.TANH).build()) .build(); - val numParams = conf.getLayer().initializer().numParams(conf); + val numParams = conf.getFirstLayer().initializer().numParams(conf); INDArray params = Nd4j.create(1, numParams); - GravesLSTM lstm = (GravesLSTM) conf.getLayer().instantiate(conf, null, 0, params, true, params.dataType()); - lstm.setBackpropGradientsViewArray(Nd4j.create(1, conf.getLayer().initializer().numParams(conf))); + GravesLSTM lstm = (GravesLSTM) conf.getFirstLayer().instantiate(conf, null, 0, params, true, params.dataType()); + lstm.setBackpropGradientsViewArray(Nd4j.create(1, conf.getFirstLayer().initializer().numParams(conf))); //Set input, do a forward pass: lstm.activate(inputData, false, LayerWorkspaceMgr.noWorkspaces()); assertNotNull(lstm.input()); @@ -155,15 +154,15 @@ public class GravesLSTMTest extends BaseDL4JTest { int miniBatchSize = 4; int timeSeriesLength = 7; - NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .layer(new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(layerSize) .dist(new UniformDistribution(0, 1)) .activation(Activation.TANH).build()) .build(); - val numParams = conf.getLayer().initializer().numParams(conf); + val numParams = conf.getFirstLayer().initializer().numParams(conf); INDArray params = Nd4j.create(1, numParams); - GravesLSTM lstm = (GravesLSTM) conf.getLayer().instantiate(conf, null, 0, params, true, params.dataType()); + GravesLSTM lstm = (GravesLSTM) conf.getFirstLayer().instantiate(conf, null, 0, params, true, params.dataType()); INDArray input = Nd4j.rand(miniBatchSize, nIn, timeSeriesLength); lstm.setInput(input, LayerWorkspaceMgr.noWorkspaces()); @@ -197,7 +196,7 @@ public class GravesLSTMTest extends BaseDL4JTest { public void testSingleExample() { Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(0.1)).seed(12345).list() .layer(0, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().activation(Activation.TANH) @@ -254,7 +253,7 @@ public class GravesLSTMTest extends BaseDL4JTest { public void testGateActivationFnsSanityCheck() { for (String gateAfn : new String[] {"sigmoid", "hardsigmoid"}) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .seed(12345).list() .layer(0, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder() @@ -268,7 +267,7 @@ public class GravesLSTMTest extends BaseDL4JTest { MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); - assertEquals(gateAfn, ((org.deeplearning4j.nn.conf.layers.GravesLSTM) net.getLayer(0).conf().getLayer()) + assertEquals(gateAfn, ((org.deeplearning4j.nn.conf.layers.GravesLSTM) net.getLayer(0).getLayerConfiguration()) .getGateActivationFn().toString()); INDArray in = Nd4j.rand(3, 2, 5); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/MaskZeroLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/MaskZeroLayerTest.java index f1fa71ab2..1a3bcbc65 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/MaskZeroLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/MaskZeroLayerTest.java @@ -23,13 +23,11 @@ package org.deeplearning4j.nn.layers.recurrent; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.TestUtils; import org.deeplearning4j.nn.api.Layer; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.RNNFormat; import org.deeplearning4j.nn.conf.layers.LSTM; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -import org.deeplearning4j.optimize.api.TrainingListener; import org.junit.jupiter.api.Test; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.api.ndarray.INDArray; @@ -71,7 +69,7 @@ public class MaskZeroLayerTest extends BaseDL4JTest { .nIn(2) .nOut(1).dataFormat(rnnDataFormat) .build(); - NeuralNetConfiguration conf = new NeuralNetConfiguration(); + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().build(); conf.setLayer(underlying); INDArray params = Nd4j.zeros(1, 16); @@ -108,7 +106,7 @@ public class MaskZeroLayerTest extends BaseDL4JTest { @Test public void testSerialization(){ - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() .layer(new org.deeplearning4j.nn.conf.layers.util.MaskZeroLayer.Builder() .setMaskValue(0.0).setUnderlying(new LSTM.Builder().nIn(4).nOut(5).dataFormat(rnnDataFormat).build()).build()) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/RnnDataFormatTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/RnnDataFormatTests.java index 2b5280339..c6b315cb5 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/RnnDataFormatTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/RnnDataFormatTests.java @@ -238,14 +238,14 @@ public class RnnDataFormatTests extends BaseDL4JTest { return getNetWithLayer(new SimpleRnn.Builder().nOut(3).build(), format, lastTimeStep, maskZeros); } } - private MultiLayerNetwork getNetWithLayer(Layer layer, RNNFormat format, boolean lastTimeStep, boolean maskZeros) { + private MultiLayerNetwork getNetWithLayer(LayerConfiguration layer, RNNFormat format, boolean lastTimeStep, boolean maskZeros) { if (maskZeros){ layer = new MaskZeroLayer.Builder().setMaskValue(0.).setUnderlying(layer).build(); } if(lastTimeStep){ layer = new LastTimeStep(layer); } - NeuralNetConfiguration.ListBuilder builder = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = (NeuralNetConfiguration.NeuralNetConfigurationBuilder) NeuralNetConfiguration.builder() .seed(12345) .list() .layer(new LSTM.Builder() @@ -260,7 +260,7 @@ public class RnnDataFormatTests extends BaseDL4JTest { (lastTimeStep)?new OutputLayer.Builder().activation(Activation.SOFTMAX).nOut(10).build(): new RnnOutputLayer.Builder().activation(Activation.SOFTMAX).nOut(10).dataFormat(format).build() ) - .setInputType(InputType.recurrent(3, 12, format)); + .inputType(InputType.recurrent(3, 12, format)); MultiLayerNetwork net = new MultiLayerNetwork(builder.build()); net.init(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestLastTimeStepLayer.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestLastTimeStepLayer.java index 4abcfa768..7755790e4 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestLastTimeStepLayer.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestLastTimeStepLayer.java @@ -62,7 +62,7 @@ public class TestLastTimeStepLayer extends BaseDL4JTest { @Test public void testLastTimeStepVertex() { - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().graphBuilder().addInputs("in") + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().graphBuilder().addInputs("in") .addLayer("lastTS", new LastTimeStep(new SimpleRnn.Builder() .nIn(5).nOut(6).dataFormat(rnnDataFormat).build()), "in") .setOutputs("lastTS") @@ -124,7 +124,7 @@ public class TestLastTimeStepLayer extends BaseDL4JTest { @Test public void testMaskingAndAllMasked(){ - ComputationGraphConfiguration.GraphBuilder builder = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration.GraphBuilder builder = NeuralNetConfiguration.builder() .optimizationAlgo(STOCHASTIC_GRADIENT_DESCENT) .weightInit(XAVIER_UNIFORM) .activation(TANH) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestRecurrentWeightInit.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestRecurrentWeightInit.java index 951680ca7..3ea9cdbdb 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestRecurrentWeightInit.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestRecurrentWeightInit.java @@ -40,7 +40,7 @@ public class TestRecurrentWeightInit extends BaseDL4JTest { for (boolean rwInit : new boolean[]{false, true}) { for (int i = 0; i < 3; i++) { - NeuralNetConfiguration.ListBuilder b = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration.NeuralNetConfigurationBuilder b = NeuralNetConfiguration.builder() .weightInit(new UniformDistribution(0, 1)) .list(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestRnnLayers.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestRnnLayers.java index b5fd0ac57..e2b6bc359 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestRnnLayers.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestRnnLayers.java @@ -22,13 +22,13 @@ package org.deeplearning4j.nn.layers.recurrent; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.TestUtils; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration.NeuralNetConfigurationBuilder; import org.deeplearning4j.nn.conf.RNNFormat; import org.deeplearning4j.nn.conf.dropout.TestDropout; import org.deeplearning4j.nn.conf.layers.GravesLSTM; import org.deeplearning4j.nn.conf.layers.LSTM; -import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.RnnLossLayer; import org.deeplearning4j.nn.conf.layers.RnnOutputLayer; import org.deeplearning4j.nn.conf.layers.recurrent.SimpleRnn; @@ -67,7 +67,7 @@ public class TestRnnLayers extends BaseDL4JTest { int nIn = 12; int nOut = 3; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .updater(new NoOp()) .weightInit(WeightInit.XAVIER) .list() @@ -119,9 +119,9 @@ public class TestRnnLayers extends BaseDL4JTest { for(String s : layerTypes){ - Layer layer; - Layer layerD; - Layer layerD2; + LayerConfiguration layer; + LayerConfiguration layerD; + LayerConfiguration layerD2; TestDropout.CustomDropout cd = new TestDropout.CustomDropout(); switch (s){ case "graves": @@ -143,21 +143,21 @@ public class TestRnnLayers extends BaseDL4JTest { throw new RuntimeException(s); } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .seed(12345) .list() .layer(layer) .layer(new RnnOutputLayer.Builder().activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(10).dataFormat(rnnDataFormat).build()) .build(); - MultiLayerConfiguration confD = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration confD = NeuralNetConfiguration.builder() .seed(12345) .list() .layer(layerD) .layer(new RnnOutputLayer.Builder().activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(10).dataFormat(rnnDataFormat).build()) .build(); - MultiLayerConfiguration confD2 = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration confD2 = NeuralNetConfiguration.builder() .seed(12345) .list() .layer(layerD2) @@ -214,9 +214,9 @@ public class TestRnnLayers extends BaseDL4JTest { for( int i=0; i<2; i++ ){ - NeuralNetConfiguration.ListBuilder lb = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration.NeuralNetConfigurationBuilder lb = NeuralNetConfiguration.builder() + - .list() .layer(new SimpleRnn.Builder().nIn(5).nOut(5).dataFormat(rnnDataFormat).build()); switch (i){ @@ -230,7 +230,7 @@ public class TestRnnLayers extends BaseDL4JTest { throw new RuntimeException(); } - MultiLayerConfiguration conf = lb.build(); + NeuralNetConfiguration conf = lb.build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestSimpleRnn.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestSimpleRnn.java index 9d77537c8..2abd86487 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestSimpleRnn.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestSimpleRnn.java @@ -22,7 +22,6 @@ package org.deeplearning4j.nn.layers.recurrent; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.TestUtils; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.RNNFormat; import org.deeplearning4j.nn.conf.layers.recurrent.SimpleRnn; @@ -68,7 +67,7 @@ public class TestSimpleRnn extends BaseDL4JTest { in = Nd4j.rand(DataType.FLOAT, m, tsLength, nIn); } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .updater(new NoOp()) .weightInit(WeightInit.XAVIER) .activation(Activation.TANH) @@ -126,7 +125,7 @@ public class TestSimpleRnn extends BaseDL4JTest { int nIn = 5; int layerSize = 6; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .updater(new NoOp()) .weightInit(WeightInit.XAVIER) .activation(Activation.TANH) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestTimeDistributed.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestTimeDistributed.java index 90a05de95..5a31cf4df 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestTimeDistributed.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestTimeDistributed.java @@ -22,7 +22,6 @@ package org.deeplearning4j.nn.layers.recurrent; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.TestUtils; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.RNNFormat; import org.deeplearning4j.nn.conf.WorkspaceMode; @@ -62,7 +61,7 @@ public class TestTimeDistributed extends BaseDL4JTest { public void testTimeDistributed(){ for(WorkspaceMode wsm : new WorkspaceMode[]{WorkspaceMode.ENABLED, WorkspaceMode.NONE}) { - MultiLayerConfiguration conf1 = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf1 = NeuralNetConfiguration.builder() .trainingWorkspaceMode(wsm) .inferenceWorkspaceMode(wsm) .seed(12345) @@ -72,10 +71,10 @@ public class TestTimeDistributed extends BaseDL4JTest { .layer(new DenseLayer.Builder().nIn(3).nOut(3).activation(Activation.TANH).build()) .layer(new RnnOutputLayer.Builder().nIn(3).nOut(3).activation(Activation.SOFTMAX).dataFormat(rnnDataFormat) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) - .setInputType(InputType.recurrent(3, rnnDataFormat)) + .inputType(InputType.recurrent(3, rnnDataFormat)) .build(); - MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder() .trainingWorkspaceMode(wsm) .inferenceWorkspaceMode(wsm) .seed(12345) @@ -85,7 +84,7 @@ public class TestTimeDistributed extends BaseDL4JTest { .layer(new TimeDistributed(new DenseLayer.Builder().nIn(3).nOut(3).activation(Activation.TANH).build(), rnnDataFormat)) .layer(new RnnOutputLayer.Builder().nIn(3).nOut(3).activation(Activation.SOFTMAX).dataFormat(rnnDataFormat) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) - .setInputType(InputType.recurrent(3, rnnDataFormat)) + .inputType(InputType.recurrent(3, rnnDataFormat)) .build(); MultiLayerNetwork net1 = new MultiLayerNetwork(conf1); @@ -135,7 +134,7 @@ public class TestTimeDistributed extends BaseDL4JTest { for( int rnnType=0; rnnType<3; rnnType++ ) { for( int ffType=0; ffType<3; ffType++ ) { - Layer l0, l2; + LayerConfiguration l0, l2; switch (rnnType) { case 0: l0 = new LSTM.Builder().nOut(5).build(); @@ -153,7 +152,7 @@ public class TestTimeDistributed extends BaseDL4JTest { throw new RuntimeException("Not implemented: " + rnnType); } - Layer l1; + LayerConfiguration l1; switch (ffType){ case 0: l1 = new DenseLayer.Builder().nOut(5).build(); @@ -168,13 +167,13 @@ public class TestTimeDistributed extends BaseDL4JTest { throw new RuntimeException("Not implemented: " + ffType); } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .activation(Activation.TANH) .list() .layer(l0) .layer(l1) .layer(l2) - .setInputType(InputType.recurrent(5, 9, rnnDataFormat)) + .inputType(InputType.recurrent(5, 9, rnnDataFormat)) .build(); BaseRecurrentLayer l0a; diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/SameDiffCustomLayerTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/SameDiffCustomLayerTests.java index 7b0f6c2cf..534af7bc2 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/SameDiffCustomLayerTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/SameDiffCustomLayerTests.java @@ -23,7 +23,6 @@ package org.deeplearning4j.nn.layers.samediff; import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.graph.GraphVertex; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -75,10 +74,10 @@ public class SameDiffCustomLayerTests extends BaseDL4JTest { @Test public void testInputValidationSameDiffLayer() { - final MultiLayerConfiguration config = new NeuralNetConfiguration.Builder().list() + final NeuralNetConfiguration config = NeuralNetConfiguration.builder().list() .layer(new ValidatingSameDiffLayer()) .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MSE).activation(Activation.SIGMOID).nOut(2).build()) - .setInputType(InputType.feedForward(2)) + .inputType(InputType.feedForward(2)) .build(); final MultiLayerNetwork net = new MultiLayerNetwork(config); @@ -95,7 +94,7 @@ public class SameDiffCustomLayerTests extends BaseDL4JTest { @Test public void testInputValidationSameDiffVertex(){ - final ComputationGraphConfiguration config = new NeuralNetConfiguration.Builder().graphBuilder() + final ComputationGraphConfiguration config = NeuralNetConfiguration.builder().graphBuilder() .addVertex("a", new ValidatingSameDiffVertex(), "input") .addLayer("output", new OutputLayer.Builder(LossFunctions.LossFunction.MSE).activation(Activation.SIGMOID).nOut(2).build(), "a") .addInputs("input") diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffConv.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffConv.java index f0d5d16ce..690c07f37 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffConv.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffConv.java @@ -25,7 +25,6 @@ import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.TestUtils; import org.deeplearning4j.gradientcheck.GradientCheckUtil; import org.deeplearning4j.nn.conf.ConvolutionMode; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.WorkspaceMode; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; @@ -66,7 +65,7 @@ public class TestSameDiffConv extends BaseDL4JTest { int kH = 2; int kW = 3; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() .layer(new SameDiffConv.Builder().nIn(nIn).nOut(nOut).kernelSize(kH, kW).build()) .build(); @@ -128,7 +127,7 @@ public class TestSameDiffConv extends BaseDL4JTest { + ", ConvolutionMode=" + cm + ", ActFn=" + a + ", hasBias=" + hasBias; log.info("Starting test: " + msg); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .seed(12345) .list() @@ -159,9 +158,9 @@ public class TestSameDiffConv extends BaseDL4JTest { MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); - assertNotNull(net.paramTable()); + assertNotNull(net.getParamTable()); - MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .weightInit(WeightInit.XAVIER) .seed(12345) @@ -193,8 +192,8 @@ public class TestSameDiffConv extends BaseDL4JTest { //Check params: note that samediff/libnd4j conv params are [kH, kW, iC, oC] //DL4J are [nOut, nIn, kH, kW] - Map params1 = net.paramTable(); - Map params2 = net2.paramTable(); + Map params1 = net.getParamTable(); + Map params2 = net2.getParamTable(); for(Map.Entry e : params1.entrySet()){ if(e.getKey().endsWith("_W")){ INDArray p1 = e.getValue(); @@ -267,7 +266,7 @@ public class TestSameDiffConv extends BaseDL4JTest { int outH = cm == ConvolutionMode.Same ? imgH : (imgH-2); int outW = cm == ConvolutionMode.Same ? imgW : (imgW-2); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .seed(12345) .updater(new NoOp()) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffDense.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffDense.java index 5e1949f8a..64d59c84b 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffDense.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffDense.java @@ -25,7 +25,6 @@ import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.TestUtils; import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator; import org.deeplearning4j.gradientcheck.GradientCheckUtil; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.WorkspaceMode; import org.deeplearning4j.nn.conf.layers.DenseLayer; @@ -64,7 +63,7 @@ public class TestSameDiffDense extends BaseDL4JTest { int nIn = 3; int nOut = 4; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() .layer(new SameDiffDense.Builder().nIn(nIn).nOut(nOut).build()) .build(); @@ -103,7 +102,7 @@ public class TestSameDiffDense extends BaseDL4JTest { for (Activation a : afns) { log.info("Starting test - " + a + ", workspace = " + wsm); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .inferenceWorkspaceMode(wsm) .trainingWorkspaceMode(wsm) .list() @@ -115,9 +114,9 @@ public class TestSameDiffDense extends BaseDL4JTest { MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); - assertNotNull(net.paramTable()); + assertNotNull(net.getParamTable()); - MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder() .list() .layer(new DenseLayer.Builder().activation(a).nIn(nIn).nOut(nOut).build()) .build(); @@ -129,8 +128,8 @@ public class TestSameDiffDense extends BaseDL4JTest { //Check params: assertEquals(net2.params(), net.params()); - Map params1 = net.paramTable(); - Map params2 = net2.paramTable(); + Map params1 = net.getParamTable(); + Map params2 = net2.getParamTable(); assertEquals(params2, params1); INDArray in = Nd4j.rand(minibatch, nIn); @@ -176,7 +175,7 @@ public class TestSameDiffDense extends BaseDL4JTest { for (Activation a : afns) { log.info("Starting test - " + a + " - workspace=" + wsm); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .seed(12345) .list() .layer(new SameDiffDense.Builder().nIn(nIn).nOut(nOut) @@ -194,9 +193,9 @@ public class TestSameDiffDense extends BaseDL4JTest { MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); - assertNotNull(net.paramTable()); + assertNotNull(net.getParamTable()); - MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder() .seed(12345) .weightInit(WeightInit.XAVIER) .list() @@ -214,8 +213,8 @@ public class TestSameDiffDense extends BaseDL4JTest { //Check params: assertEquals(net2.params(), net.params()); - Map params1 = net.paramTable(); - Map params2 = net2.paramTable(); + Map params1 = net.getParamTable(); + Map params2 = net2.getParamTable(); assertEquals(params2, params1); INDArray in = Nd4j.rand(minibatch, nIn); @@ -264,7 +263,7 @@ public class TestSameDiffDense extends BaseDL4JTest { for (Activation a : afns) { log.info("Starting test - " + a + " - minibatch " + minibatch + ", workspaces: " + workspaces); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .trainingWorkspaceMode(workspaces ? WorkspaceMode.ENABLED : WorkspaceMode.NONE) .inferenceWorkspaceMode(workspaces ? WorkspaceMode.ENABLED : WorkspaceMode.NONE) .list() @@ -278,7 +277,7 @@ public class TestSameDiffDense extends BaseDL4JTest { MultiLayerNetwork netSD = new MultiLayerNetwork(conf); netSD.init(); - MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder() .list() .layer(new DenseLayer.Builder().activation(a).nIn(nIn).nOut(nOut).build()) .layer(new OutputLayer.Builder().nIn(nOut).nOut(nOut).activation(Activation.SOFTMAX) @@ -292,7 +291,7 @@ public class TestSameDiffDense extends BaseDL4JTest { //Check params: assertEquals(netStandard.params(), netSD.params()); - assertEquals(netStandard.paramTable(), netSD.paramTable()); + assertEquals(netStandard.getParamTable(), netSD.getParamTable()); INDArray in = Nd4j.rand(minibatch, nIn); INDArray l = TestUtils.randomOneHot(minibatch, nOut, 12345); @@ -352,7 +351,7 @@ public class TestSameDiffDense extends BaseDL4JTest { for(WorkspaceMode wsm : new WorkspaceMode[]{WorkspaceMode.ENABLED, WorkspaceMode.NONE}) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .seed(12345) .trainingWorkspaceMode(wsm) .inferenceWorkspaceMode(wsm) @@ -367,7 +366,7 @@ public class TestSameDiffDense extends BaseDL4JTest { MultiLayerNetwork netSD = new MultiLayerNetwork(conf); netSD.init(); - MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder() .seed(12345) .updater(new Adam(0.1)) .list() @@ -384,7 +383,7 @@ public class TestSameDiffDense extends BaseDL4JTest { //Check params: assertEquals(netStandard.params(), netSD.params()); - assertEquals(netStandard.paramTable(), netSD.paramTable()); + assertEquals(netStandard.getParamTable(), netSD.getParamTable()); DataSetIterator iter = new IrisDataSetIterator(150, 150); DataSet ds = iter.next(); @@ -422,7 +421,7 @@ public class TestSameDiffDense extends BaseDL4JTest { String msg = "workspaces: " + workspaces + ", " + a; Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .seed(12345) .updater(new NoOp()) @@ -433,7 +432,7 @@ public class TestSameDiffDense extends BaseDL4JTest { .layer(new SameDiffDense.Builder().nIn(nOut).nOut(nOut).activation(a).build()) .layer(new OutputLayer.Builder().nIn(nOut).nOut(nOut).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) - //.setInputType(InputType.feedForward(nIn)) //TODO + //.inputType(InputType.feedForward(nIn)) //TODO .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffDenseVertex.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffDenseVertex.java index 630ec1231..f70c4de92 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffDenseVertex.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffDenseVertex.java @@ -65,7 +65,7 @@ public class TestSameDiffDenseVertex extends BaseDL4JTest { for (Activation a : afns) { log.info("Starting test - " + a + " - minibatch " + minibatch + ", workspaces: " + workspaces); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .trainingWorkspaceMode(workspaces ? WorkspaceMode.ENABLED : WorkspaceMode.NONE) .inferenceWorkspaceMode(workspaces ? WorkspaceMode.ENABLED : WorkspaceMode.NONE) @@ -82,7 +82,7 @@ public class TestSameDiffDenseVertex extends BaseDL4JTest { ComputationGraph netSD = new ComputationGraph(conf); netSD.init(); - ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf2 = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .trainingWorkspaceMode(workspaces ? WorkspaceMode.ENABLED : WorkspaceMode.NONE) .inferenceWorkspaceMode(workspaces ? WorkspaceMode.ENABLED : WorkspaceMode.NONE) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffLambda.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffLambda.java index 4afbc7e37..8da331f8e 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffLambda.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffLambda.java @@ -62,7 +62,7 @@ public class TestSameDiffLambda extends BaseDL4JTest { Nd4j.getRandom().setSeed(12345); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .trainingWorkspaceMode(wsm) .inferenceWorkspaceMode(wsm) .seed(12345) @@ -77,7 +77,7 @@ public class TestSameDiffLambda extends BaseDL4JTest { .build(); //Equavalent, not using SameDiff Lambda: - ComputationGraphConfiguration confStd = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration confStd = NeuralNetConfiguration.builder() .trainingWorkspaceMode(wsm) .inferenceWorkspaceMode(wsm) .seed(12345) @@ -143,7 +143,7 @@ public class TestSameDiffLambda extends BaseDL4JTest { log.info("--- Workspace Mode: {} ---", wsm); Nd4j.getRandom().setSeed(12345); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .trainingWorkspaceMode(wsm) .inferenceWorkspaceMode(wsm) .dataType(DataType.DOUBLE) @@ -160,7 +160,7 @@ public class TestSameDiffLambda extends BaseDL4JTest { .build(); //Equavalent, not using SameDiff Lambda: - ComputationGraphConfiguration confStd = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration confStd = NeuralNetConfiguration.builder() .trainingWorkspaceMode(wsm) .inferenceWorkspaceMode(wsm) .dataType(DataType.DOUBLE) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffOutput.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffOutput.java index 2f0479b67..8ff1d6bc9 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffOutput.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffOutput.java @@ -22,7 +22,6 @@ package org.deeplearning4j.nn.layers.samediff; import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.BaseDL4JTest; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.LossLayer; @@ -48,7 +47,7 @@ public class TestSameDiffOutput extends BaseDL4JTest { public void testOutputMSELossLayer(){ Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration confSD = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration confSD = NeuralNetConfiguration.builder() .seed(12345) .updater(new Adam(0.01)) .list() @@ -56,7 +55,7 @@ public class TestSameDiffOutput extends BaseDL4JTest { .layer(new SameDiffMSELossLayer()) .build(); - MultiLayerConfiguration confStd = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration confStd = NeuralNetConfiguration.builder() .seed(12345) .updater(new Adam(0.01)) .list() @@ -110,7 +109,7 @@ public class TestSameDiffOutput extends BaseDL4JTest { for(Activation a : new Activation[]{Activation.IDENTITY, Activation.TANH, Activation.SOFTMAX}) { log.info("Starting test: " + a); - MultiLayerConfiguration confSD = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration confSD = NeuralNetConfiguration.builder() .seed(12345) .updater(new Adam(0.01)) .list() @@ -118,7 +117,7 @@ public class TestSameDiffOutput extends BaseDL4JTest { .layer(new SameDiffMSEOutputLayer(5, 5, a, WeightInit.XAVIER)) .build(); - MultiLayerConfiguration confStd = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration confStd = NeuralNetConfiguration.builder() .seed(12345) .updater(new Adam(0.01)) .list() @@ -134,7 +133,7 @@ public class TestSameDiffOutput extends BaseDL4JTest { netSD.params().assign(netStd.params()); - assertEquals(netStd.paramTable(), netSD.paramTable()); + assertEquals(netStd.getParamTable(), netSD.getParamTable()); int minibatch = 2; INDArray in = Nd4j.rand(minibatch, 5); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/MinimalSameDiffDense.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/MinimalSameDiffDense.java index 8864448b0..bc0677adb 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/MinimalSameDiffDense.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/MinimalSameDiffDense.java @@ -94,5 +94,5 @@ public class MinimalSameDiffDense extends SameDiffLayer { //OPTIONAL methods: // public void setNIn(InputType inputType, boolean override) // public InputPreProcessor getPreProcessorForInputType(InputType inputType) -// public void applyGlobalConfigToLayer(NeuralNetConfiguration.Builder globalConfig) +// public void applyGlobalConfigToLayer(NeuralNetConfiguration.NeuralNetConfigurationBuilder globalConfig) } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffConv.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffConv.java index 0049696de..6fe2cf15e 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffConv.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffConv.java @@ -154,12 +154,13 @@ public class SameDiffConv extends SameDiffLayer { } @Override - public void applyGlobalConfigToLayer(NeuralNetConfiguration.Builder globalConfig) { + public void applyGlobalConfigToLayer(NeuralNetConfiguration.NeuralNetConfigurationBuilder globalConfig) { + NeuralNetConfiguration clone = globalConfig.clone().build(); if (activation == null) { - activation = SameDiffLayerUtils.fromIActivation(globalConfig.getActivationFn()); + activation = SameDiffLayerUtils.fromIActivation(clone.getActivationFn()); } if (cm == null) { - cm = globalConfig.getConvolutionMode(); + cm = clone.getConvolutionMode(); } } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffDense.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffDense.java index 3595282c0..d0a176d63 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffDense.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffDense.java @@ -116,9 +116,10 @@ public class SameDiffDense extends SameDiffLayer { } @Override - public void applyGlobalConfigToLayer(NeuralNetConfiguration.Builder globalConfig) { + public void applyGlobalConfigToLayer(NeuralNetConfiguration.NeuralNetConfigurationBuilder globalConfig) { + NeuralNetConfiguration clone = globalConfig.clone().build(); if(activation == null){ - activation = SameDiffLayerUtils.fromIActivation(globalConfig.getActivationFn()); + activation = SameDiffLayerUtils.fromIActivation(clone.getActivationFn()); } } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffMSEOutputLayer.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffMSEOutputLayer.java index 41d149b3b..a93db0e56 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffMSEOutputLayer.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffMSEOutputLayer.java @@ -85,7 +85,7 @@ public class SameDiffMSEOutputLayer extends SameDiffOutputLayer { } @Override - public void applyGlobalConfigToLayer(NeuralNetConfiguration.Builder globalConfig){ + public void applyGlobalConfigToLayer(NeuralNetConfiguration.NeuralNetConfigurationBuilder globalConfig){ } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/variational/TestVAE.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/variational/TestVAE.java index f535c81fa..3da4abed5 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/variational/TestVAE.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/variational/TestVAE.java @@ -21,10 +21,10 @@ package org.deeplearning4j.nn.layers.variational; import org.deeplearning4j.BaseDL4JTest; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.WorkspaceMode; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder; import org.deeplearning4j.nn.conf.layers.variational.*; @@ -56,16 +56,16 @@ public class TestVAE extends BaseDL4JTest { @Test public void testInitialization() { - MultiLayerConfiguration mlc = - new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration mlc = + NeuralNetConfiguration.builder() .layer(0, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder() .nIn(10).nOut(5).encoderLayerSizes(12).decoderLayerSizes(13) .build()) .build(); - NeuralNetConfiguration c = mlc.getConf(0); + LayerConfiguration c = mlc.getFirstLayer(); org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder vae = - (org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder) c.getLayer(); + (VariationalAutoencoder) c; long allParams = vae.initializer().numParams(c); @@ -94,14 +94,14 @@ public class TestVAE extends BaseDL4JTest { int[][] encLayerSizes = new int[][] {{12}, {12, 13}, {12, 13, 14}}; for (int i = 0; i < encLayerSizes.length; i++) { - MultiLayerConfiguration mlc = new NeuralNetConfiguration.Builder().list().layer(0, + NeuralNetConfiguration mlc = NeuralNetConfiguration.builder().list().layer(0, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder().nIn(10) .nOut(5).encoderLayerSizes(encLayerSizes[i]).decoderLayerSizes(13).build()) .build(); - NeuralNetConfiguration c = mlc.getConf(0); + LayerConfiguration c = mlc.getConf(0); org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder vae = - (org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder) c.getLayer(); + (org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder) c; MultiLayerNetwork net = new MultiLayerNetwork(mlc); net.init(); @@ -120,14 +120,14 @@ public class TestVAE extends BaseDL4JTest { int inputSize = 3; - MultiLayerConfiguration mlc = new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration mlc = NeuralNetConfiguration.builder().list() .layer(0, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder() .nIn(inputSize).nOut(4).encoderLayerSizes(5).decoderLayerSizes(6).build()) .build(); - NeuralNetConfiguration c = mlc.getConf(0); + LayerConfiguration c = mlc.getConf(0); org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder vae = - (org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder) c.getLayer(); + (org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder) c; long allParams = vae.initializer().numParams(c); @@ -158,14 +158,14 @@ public class TestVAE extends BaseDL4JTest { @Test public void testParamGradientOrderAndViews() { Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration mlc = new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration mlc = NeuralNetConfiguration.builder().list() .layer(0, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder() .nIn(10).nOut(5).encoderLayerSizes(12, 13).decoderLayerSizes(14, 15).build()) .build(); - NeuralNetConfiguration c = mlc.getConf(0); + LayerConfiguration c = mlc.getConf(0); org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder vae = - (org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder) c.getLayer(); + (org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder) c; MultiLayerNetwork net = new MultiLayerNetwork(mlc); net.init(); @@ -216,16 +216,16 @@ public class TestVAE extends BaseDL4JTest { //Idea: pretrain-specific parameters shouldn't change during backprop Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration mlc = new NeuralNetConfiguration.Builder().seed(12345).list() + NeuralNetConfiguration mlc = NeuralNetConfiguration.builder().seed(12345).list() .layer(0, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder() .nIn(10).nOut(5).encoderLayerSizes(12, 13).decoderLayerSizes(14, 15).build()) .layer(1, new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(5).nOut(6) .activation(new ActivationTanH()).build()) .build(); - NeuralNetConfiguration c = mlc.getConf(0); + LayerConfiguration c = mlc.getConf(0); org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder vae = - (org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder) c.getLayer(); + (org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder) c; MultiLayerNetwork net = new MultiLayerNetwork(mlc); net.init(); @@ -268,7 +268,7 @@ public class TestVAE extends BaseDL4JTest { @Test public void testJsonYaml() { - MultiLayerConfiguration config = new NeuralNetConfiguration.Builder().seed(12345).list() + NeuralNetConfiguration config = NeuralNetConfiguration.builder().seed(12345).list() .layer(0, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder() .reconstructionDistribution(new GaussianReconstructionDistribution(Activation.IDENTITY)) .nIn(3).nOut(4).encoderLayerSizes(5).decoderLayerSizes(6).build()) @@ -299,8 +299,8 @@ public class TestVAE extends BaseDL4JTest { String asJson = config.toJson(); String asYaml = config.toYaml(); - MultiLayerConfiguration fromJson = MultiLayerConfiguration.fromJson(asJson); - MultiLayerConfiguration fromYaml = MultiLayerConfiguration.fromYaml(asYaml); + NeuralNetConfiguration fromJson = NeuralNetConfiguration.fromJson(asJson); + NeuralNetConfiguration fromYaml = NeuralNetConfiguration.fromYaml(asYaml); assertEquals(config, fromJson); assertEquals(config, fromYaml); @@ -350,7 +350,7 @@ public class TestVAE extends BaseDL4JTest { throw new RuntimeException(); } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().l2(0.2).l1(0.3) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().l2(0.2).l1(0.3) .updater(new Sgd(1.0)) .seed(12345L).dist(new NormalDistribution(0, 1)) .list().layer(0, @@ -416,7 +416,7 @@ public class TestVAE extends BaseDL4JTest { for (int i = 0; i < reconstructionDistributions.length; i++) { INDArray data = Nd4j.rand(minibatch, inOutSize).muli(2).subi(1); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().l2(0.2).l1(0.3) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().l2(0.2).l1(0.3) .updater(new Sgd(1.0)) .seed(12345L).dist(new NormalDistribution(0, 1)) .list().layer(0, @@ -456,7 +456,7 @@ public class TestVAE extends BaseDL4JTest { for(boolean ws : new boolean[]{false, true}) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .seed(12345L) .trainingWorkspaceMode(ws ? WorkspaceMode.ENABLED : WorkspaceMode.NONE) .inferenceWorkspaceMode(ws ? WorkspaceMode.ENABLED : WorkspaceMode.NONE) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/CloseNetworkTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/CloseNetworkTests.java index 175292211..0b8b1877d 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/CloseNetworkTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/CloseNetworkTests.java @@ -22,7 +22,6 @@ package org.deeplearning4j.nn.misc; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.TestUtils; import org.deeplearning4j.nn.api.Updater; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.*; @@ -40,15 +39,15 @@ import static org.junit.jupiter.api.Assertions.assertTrue; public class CloseNetworkTests extends BaseDL4JTest { public static MultiLayerNetwork getTestNet() { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .updater(new Adam(1e-3)) - .list() + .layer(new ConvolutionLayer.Builder().nOut(5).kernelSize(3, 3).activation(Activation.TANH).build()) .layer(new BatchNormalization.Builder().nOut(5).build()) .layer(new SubsamplingLayer.Builder().build()) .layer(new DenseLayer.Builder().nOut(10).activation(Activation.RELU).build()) .layer(new OutputLayer.Builder().nOut(10).build()) - .setInputType(InputType.convolutional(28, 28, 1)) + .inputType(InputType.convolutional(28, 28, 1)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/LargeNetTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/LargeNetTest.java index 44d1a2098..09dfb45ea 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/LargeNetTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/LargeNetTest.java @@ -22,7 +22,6 @@ package org.deeplearning4j.nn.misc; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.EmbeddingLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; @@ -31,7 +30,6 @@ import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.junit.jupiter.api.Test; import org.nd4j.linalg.activations.Activation; -import org.nd4j.linalg.api.buffer.DataBuffer; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; @@ -50,8 +48,8 @@ public class LargeNetTest extends BaseDL4JTest { //More than 2.1 billion parameters //10M classes plus 300 vector size -> 3 billion elements - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() + .layer(new EmbeddingLayer.Builder().nIn(10_000_000).nOut(300).build()) .layer(new OutputLayer.Builder().nIn(300).nOut(10).activation(Activation.SOFTMAX).build()) .build(); @@ -82,7 +80,7 @@ public class LargeNetTest extends BaseDL4JTest { //More than 2.1 billion parameters //10M classes plus 300 vector size -> 3 billion elements - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") .layer("0", new EmbeddingLayer.Builder().nIn(10_000_000).nOut(300).build(), "in") diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/TestLrChanges.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/TestLrChanges.java index 77f3a2342..f6ddd312c 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/TestLrChanges.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/TestLrChanges.java @@ -22,11 +22,9 @@ package org.deeplearning4j.nn.misc; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; -import org.deeplearning4j.nn.conf.weightnoise.DropConnect; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.junit.jupiter.api.Test; @@ -49,10 +47,10 @@ public class TestLrChanges extends BaseDL4JTest { @Test public void testChangeLrMLN(){ //First: Set LR for a *single* layer and compare vs. equivalent net config - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .activation(Activation.TANH) .seed(12345) - .list() + .layer(new DenseLayer.Builder().nIn(10).nOut(10).updater(new Adam(0.1)).build()) .layer(new DenseLayer.Builder().nIn(10).nOut(10).updater(new RmsProp(0.01)).build()) .layer(new OutputLayer.Builder().nIn(10).nOut(10).updater(new NoOp()).lossFunction(LossFunctions.LossFunction.MSE).build()) @@ -66,10 +64,10 @@ public class TestLrChanges extends BaseDL4JTest { } - MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder() .activation(Activation.TANH) .seed(12345) - .list() + .layer(new DenseLayer.Builder().nIn(10).nOut(10).updater(new Adam(0.5)).build()) //0.5 LR .layer(new DenseLayer.Builder().nIn(10).nOut(10).updater(new RmsProp(0.01)).build()) .layer(new OutputLayer.Builder().nIn(10).nOut(10).updater(new NoOp()).lossFunction(LossFunctions.LossFunction.MSE).build()) @@ -116,10 +114,10 @@ public class TestLrChanges extends BaseDL4JTest { //Now: Set *all* LRs to say 0.3... - MultiLayerConfiguration conf3 = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf3 = NeuralNetConfiguration.builder() .activation(Activation.TANH) .seed(12345) - .list() + .layer(new DenseLayer.Builder().nIn(10).nOut(10).updater(new Adam(0.3)).build()) //0.5 LR .layer(new DenseLayer.Builder().nIn(10).nOut(10).updater(new RmsProp(0.3)).build()) .layer(new OutputLayer.Builder().nIn(10).nOut(10).updater(new NoOp()).lossFunction(LossFunctions.LossFunction.MSE).build()) @@ -148,11 +146,11 @@ public class TestLrChanges extends BaseDL4JTest { @Test public void testChangeLSGD() { //Simple test for no updater nets - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .activation(Activation.TANH) .seed(12345) .updater(new Sgd(0.1)) - .list() + .layer(new DenseLayer.Builder().nIn(10).nOut(10).build()) .layer(new DenseLayer.Builder().nIn(10).nOut(10).build()) .layer(new OutputLayer.Builder().nIn(10).nOut(10).lossFunction(LossFunctions.LossFunction.MSE).build()) @@ -177,11 +175,11 @@ public class TestLrChanges extends BaseDL4JTest { @Test public void testChangeLrMLNSchedule(){ //First: Set LR for a *single* layer and compare vs. equivalent net config - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .activation(Activation.TANH) .seed(12345) .updater(new Adam(0.1)) - .list() + .layer(new DenseLayer.Builder().nIn(10).nOut(10).build()) .layer(new DenseLayer.Builder().nIn(10).nOut(10).build()) .layer(new OutputLayer.Builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build()) @@ -195,11 +193,11 @@ public class TestLrChanges extends BaseDL4JTest { } - MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder() .activation(Activation.TANH) .seed(12345) .updater(new Adam(new ExponentialSchedule(ScheduleType.ITERATION, 0.5, 0.8 ))) - .list() + .layer(new DenseLayer.Builder().nIn(10).nOut(10).build()) .layer(new DenseLayer.Builder().nIn(10).nOut(10).build()) .layer(new OutputLayer.Builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build()) @@ -239,7 +237,7 @@ public class TestLrChanges extends BaseDL4JTest { @Test public void testChangeLrCompGraph(){ //First: Set LR for a *single* layer and compare vs. equivalent net config - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .activation(Activation.TANH) .seed(12345) .graphBuilder() @@ -258,7 +256,7 @@ public class TestLrChanges extends BaseDL4JTest { } - ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf2 = NeuralNetConfiguration.builder() .activation(Activation.TANH) .seed(12345) .graphBuilder() @@ -310,7 +308,7 @@ public class TestLrChanges extends BaseDL4JTest { //Now: Set *all* LRs to say 0.3... - MultiLayerConfiguration conf3 = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf3 = NeuralNetConfiguration.builder() .activation(Activation.TANH) .seed(12345) .list() @@ -342,7 +340,7 @@ public class TestLrChanges extends BaseDL4JTest { @Test public void testChangeLrCompGraphSchedule(){ //First: Set LR for a *single* layer and compare vs. equivalent net config - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .activation(Activation.TANH) .seed(12345) .updater(new Adam(0.1)) @@ -362,7 +360,7 @@ public class TestLrChanges extends BaseDL4JTest { } - ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf2 = NeuralNetConfiguration.builder() .activation(Activation.TANH) .seed(12345) .updater(new Adam(new ExponentialSchedule(ScheduleType.ITERATION, 0.5, 0.8 ))) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/TestMemoryReports.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/TestMemoryReports.java index a7fcee172..b22bfec2f 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/TestMemoryReports.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/TestMemoryReports.java @@ -24,7 +24,6 @@ import org.apache.commons.io.FileUtils; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.nn.conf.CacheMode; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.graph.*; import org.deeplearning4j.nn.conf.graph.rnn.DuplicateToTimeSeriesVertex; @@ -53,8 +52,8 @@ import static org.junit.jupiter.api.Assertions.assertEquals; public class TestMemoryReports extends BaseDL4JTest { - public static List> getTestLayers() { - List> l = new ArrayList<>(); + public static List> getTestLayers() { + List> l = new ArrayList<>(); l.add(new Pair<>(new ActivationLayer.Builder().activation(Activation.TANH).build(), InputType.feedForward(20))); l.add(new Pair<>(new DenseLayer.Builder().nIn(20).nOut(20).build(), InputType.feedForward(20))); l.add(new Pair<>(new DropoutLayer.Builder().nIn(20).nOut(20).build(), InputType.feedForward(20))); @@ -100,12 +99,12 @@ public class TestMemoryReports extends BaseDL4JTest { @Test public void testMemoryReportSimple() { - List> l = getTestLayers(); + List> l = getTestLayers(); - for (Pair p : l) { + for (Pair p : l) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list().layer(0, p.getFirst().clone()) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().layer(0, p.getFirst().clone()) .layer(1, p.getFirst().clone()).validateOutputLayerConfig(false).build(); MemoryReport mr = conf.getMemoryReport(p.getSecond()); @@ -128,12 +127,12 @@ public class TestMemoryReports extends BaseDL4JTest { @Test public void testMemoryReportSimpleCG() { - List> l = getTestLayers(); + List> l = getTestLayers(); - for (Pair p : l) { + for (Pair p : l) { - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().graphBuilder().addInputs("in") + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().graphBuilder().addInputs("in") .addLayer("0", p.getFirst().clone(), "in").addLayer("1", p.getFirst().clone(), "0") .setOutputs("1").validateOutputLayerConfig(false).build(); @@ -168,7 +167,7 @@ public class TestMemoryReports extends BaseDL4JTest { layerInputs = new String[] {"1"}; } - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().graphBuilder().addInputs(inputs) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().graphBuilder().addInputs(inputs) .allowDisconnected(true) .addVertex("gv", p.getFirst(), layerInputs).setOutputs("gv").build(); @@ -216,7 +215,7 @@ public class TestMemoryReports extends BaseDL4JTest { @Test public void validateSimple() { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .layer(0, new DenseLayer.Builder().nIn(10).nOut(20).build()) .layer(1, new DenseLayer.Builder().nIn(20).nOut(27).build()).build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/TestNetConversion.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/TestNetConversion.java index fc8312630..fdfb16fcd 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/TestNetConversion.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/TestNetConversion.java @@ -22,7 +22,6 @@ package org.deeplearning4j.nn.misc; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.nn.conf.ConvolutionMode; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.*; @@ -91,17 +90,17 @@ public class TestNetConversion extends BaseDL4JTest { private MultiLayerNetwork getNet1(boolean train) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .convolutionMode(ConvolutionMode.Same) .activation(Activation.TANH) .weightInit(WeightInit.XAVIER) .updater(new Sgd(0.1)) - .list() + .layer(new ConvolutionLayer.Builder().nIn(3).nOut(5).kernelSize(2, 2).stride(1, 1).build()) .layer(new SubsamplingLayer.Builder().kernelSize(2, 2).stride(1, 1).build()) .layer(new DenseLayer.Builder().nOut(32).build()) .layer(new OutputLayer.Builder().nOut(10).lossFunction(LossFunctions.LossFunction.MSE).build()) - .setInputType(InputType.convolutional(10, 10, 3)) + .inputType(InputType.convolutional(10, 10, 3)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -121,16 +120,16 @@ public class TestNetConversion extends BaseDL4JTest { private MultiLayerNetwork getNet2() { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .convolutionMode(ConvolutionMode.Same) .activation(Activation.TANH) .weightInit(WeightInit.XAVIER) .updater(new Sgd(0.1)) - .list() + .layer(new GravesLSTM.Builder().nOut(8).build()) .layer(new LSTM.Builder().nOut(8).build()) .layer(new RnnOutputLayer.Builder().nOut(10).lossFunction(LossFunctions.LossFunction.MSE).build()) - .setInputType(InputType.recurrent(5)) + .inputType(InputType.recurrent(5)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/WorkspaceTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/WorkspaceTests.java index 5b00685af..9649adffd 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/WorkspaceTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/WorkspaceTests.java @@ -93,24 +93,24 @@ public class WorkspaceTests extends BaseDL4JTest { int depthOut = 2; int nOut = 2; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER) - .convolutionMode(ConvolutionMode.Same).seed(12345L).list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER) + .convolutionMode(ConvolutionMode.Same).seed(12345L) .layer(0, new ConvolutionLayer.Builder().nIn(depthIn).nOut(depthOut).kernelSize(2, 2) .stride(1, 1).activation(Activation.TANH).build()) .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(nOut).build()) - .setInputType(InputType.convolutional(5, 5, 2)) + .inputType(InputType.convolutional(5, 5, 2)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf.clone()); net.init(); - net.getLayerWiseConfigurations().setInferenceWorkspaceMode(WorkspaceMode.ENABLED); - net.getLayerWiseConfigurations().setTrainingWorkspaceMode(WorkspaceMode.ENABLED); + net.getConfiguration().setInferenceWorkspaceMode(WorkspaceMode.ENABLED); + net.getConfiguration().setTrainingWorkspaceMode(WorkspaceMode.ENABLED); MultiLayerNetwork net2 = new MultiLayerNetwork(conf.clone()); net2.init(); - net2.getLayerWiseConfigurations().setInferenceWorkspaceMode(WorkspaceMode.NONE); - net2.getLayerWiseConfigurations().setTrainingWorkspaceMode(WorkspaceMode.NONE); + net2.getConfiguration().setInferenceWorkspaceMode(WorkspaceMode.NONE); + net2.getConfiguration().setTrainingWorkspaceMode(WorkspaceMode.NONE); INDArray in = Nd4j.rand(1, 2, 5, 5); @@ -120,7 +120,7 @@ public class WorkspaceTests extends BaseDL4JTest { public static ComputationGraph createNet() throws Exception { - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") .addLayer("0", new ConvolutionLayer.Builder().nOut(3) @@ -149,7 +149,7 @@ public class WorkspaceTests extends BaseDL4JTest { for (WorkspaceMode wm : WorkspaceMode.values()) { System.out.println(wm); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .trainingWorkspaceMode(wm) .inferenceWorkspaceMode(wm) .graphBuilder() @@ -184,15 +184,15 @@ public class WorkspaceTests extends BaseDL4JTest { public void testWithPreprocessorsMLN() { for (WorkspaceMode wm : WorkspaceMode.values()) { System.out.println(wm); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .trainingWorkspaceMode(wm) .inferenceWorkspaceMode(wm) - .list() + .layer(new GravesLSTM.Builder().nIn(10).nOut(5).build()) .layer(new GravesLSTM.Builder().nIn(5).nOut(8).build()) .layer(new RnnOutputLayer.Builder(LossFunctions.LossFunction.MSE).activation(Activation.SIGMOID).nOut(3).build()) .inputPreProcessor(0, new DupPreProcessor()) - .setInputType(InputType.recurrent(10)) + .inputType(InputType.recurrent(10)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -247,14 +247,14 @@ public class WorkspaceTests extends BaseDL4JTest { System.out.println("Starting test: " + ws + " - " + i); - NeuralNetConfiguration.ListBuilder b = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration.NeuralNetConfigurationBuilder b = NeuralNetConfiguration.builder() .weightInit(WeightInit.XAVIER) .activation(Activation.TANH) .inferenceWorkspaceMode(ws) .trainingWorkspaceMode(ws) .list(); - ComputationGraphConfiguration.GraphBuilder gb = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration.GraphBuilder gb = NeuralNetConfiguration.builder() .weightInit(WeightInit.XAVIER) .activation(Activation.TANH) .inferenceWorkspaceMode(ws) @@ -292,7 +292,7 @@ public class WorkspaceTests extends BaseDL4JTest { gb.addLayer("out", new RnnOutputLayer.Builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build(), "1"); gb.setOutputs("out"); - MultiLayerConfiguration conf = b.build(); + NeuralNetConfiguration conf = b.build(); ComputationGraphConfiguration conf2 = gb.build(); @@ -320,14 +320,14 @@ public class WorkspaceTests extends BaseDL4JTest { System.out.println("Starting test: " + ws + " - " + i); - NeuralNetConfiguration.ListBuilder b = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration.NeuralNetConfigurationBuilder b = NeuralNetConfiguration.builder() .weightInit(WeightInit.XAVIER) .activation(Activation.TANH) .inferenceWorkspaceMode(ws) .trainingWorkspaceMode(ws) .list(); - ComputationGraphConfiguration.GraphBuilder gb = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration.GraphBuilder gb = NeuralNetConfiguration.builder() .weightInit(WeightInit.XAVIER) .activation(Activation.TANH) .inferenceWorkspaceMode(ws) @@ -366,14 +366,14 @@ public class WorkspaceTests extends BaseDL4JTest { .nIn(10).nOut(10).build(), "1"); gb.setOutputs("out"); - MultiLayerConfiguration conf = b + NeuralNetConfiguration conf = b .backpropType(BackpropType.TruncatedBPTT) - .tBPTTLength(5) + .tbpttBackLength(5).tbpttFwdLength(5) .build(); ComputationGraphConfiguration conf2 = gb .backpropType(BackpropType.TruncatedBPTT) - .tBPTTForwardLength(5).tBPTTBackwardLength(5) + .tbpttFwdLength(5).tbpttBackLength(5) .build(); @@ -400,7 +400,7 @@ public class WorkspaceTests extends BaseDL4JTest { log.info("WorkspaceMode = " + ws); Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .weightInit(WeightInit.XAVIER) .seed(12345) .trainingWorkspaceMode(ws).inferenceWorkspaceMode(ws) @@ -429,7 +429,7 @@ public class WorkspaceTests extends BaseDL4JTest { public void testWorkspaceSetting() { for (WorkspaceMode wsm : WorkspaceMode.values()) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .weightInit(WeightInit.XAVIER) .seed(12345) .trainingWorkspaceMode(wsm).inferenceWorkspaceMode(wsm) @@ -441,7 +441,7 @@ public class WorkspaceTests extends BaseDL4JTest { assertEquals(wsm, conf.getInferenceWorkspaceMode()); - MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder() .weightInit(WeightInit.XAVIER) .seed(12345) .trainingWorkspaceMode(wsm).inferenceWorkspaceMode(wsm) @@ -458,7 +458,7 @@ public class WorkspaceTests extends BaseDL4JTest { @Test public void testClearing() { for(WorkspaceMode wsm : WorkspaceMode.values()) { - ComputationGraphConfiguration config = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration config = NeuralNetConfiguration.builder() .updater(new Adam()) .inferenceWorkspaceMode(wsm) .trainingWorkspaceMode(wsm) @@ -501,7 +501,7 @@ public class WorkspaceTests extends BaseDL4JTest { MemoryWorkspace workspace = Nd4j.getWorkspaceManager().getWorkspaceForCurrentThread(conf, wsName); - MultiLayerConfiguration netConf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration netConf = NeuralNetConfiguration.builder() .seed(12345) .weightInit(WeightInit.XAVIER) .list() @@ -556,7 +556,7 @@ public class WorkspaceTests extends BaseDL4JTest { final INDArray input = Nd4j.rand(1, 30); - final ComputationGraphConfiguration computationGraphConfiguration = new NeuralNetConfiguration.Builder() + final ComputationGraphConfiguration computationGraphConfiguration = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("state") .addLayer("value_output", new OutputLayer.Builder().nIn(30).nOut(1).activation(Activation.IDENTITY) @@ -578,7 +578,7 @@ public class WorkspaceTests extends BaseDL4JTest { INDArray input = Nd4j.rand(1, 30); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() .layer(new OutputLayer.Builder().nIn(30).nOut(1).activation(Activation.IDENTITY).lossFunction(LossFunctions.LossFunction.MSE).build()) .build(); @@ -607,13 +607,13 @@ public class WorkspaceTests extends BaseDL4JTest { - MultiLayerConfiguration mlc = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER) + NeuralNetConfiguration mlc = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER) .convolutionMode(ConvolutionMode.Same).seed(12345L).list() .layer(0, new ConvolutionLayer.Builder().nIn(1).nOut(2).kernelSize(2, 2) .stride(1, 1).activation(Activation.TANH).build()) .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(10).build()) - .setInputType(InputType.convolutional(5, 5, 1)) + .inputType(InputType.convolutional(5, 5, 1)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(mlc); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/mkldnn/ValidateMKLDNN.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/mkldnn/ValidateMKLDNN.java index 695fdb70d..ca9c0f67c 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/mkldnn/ValidateMKLDNN.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/mkldnn/ValidateMKLDNN.java @@ -25,7 +25,6 @@ import org.deeplearning4j.LayerHelperValidationUtil; import org.deeplearning4j.TestUtils; import org.deeplearning4j.datasets.iterator.impl.SingletonDataSetIterator; import org.deeplearning4j.nn.conf.ConvolutionMode; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.WorkspaceMode; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; @@ -74,7 +73,7 @@ public class ValidateMKLDNN extends BaseDL4JTest { INDArray f = Nd4j.rand(DataType.FLOAT, inputSize); INDArray l = TestUtils.randomOneHot(minibatch, 10).castTo(DataType.FLOAT); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .updater(new Adam(0.01)) .convolutionMode(cm) .seed(12345) @@ -98,7 +97,7 @@ public class ValidateMKLDNN extends BaseDL4JTest { .nOut(3) .build()) .layer(new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build()) - .setInputType(InputType.convolutional(inputSize[2], inputSize[3], inputSize[1])) + .inputType(InputType.convolutional(inputSize[2], inputSize[3], inputSize[1])) .build(); MultiLayerNetwork netWith = new MultiLayerNetwork(conf.clone()); @@ -149,7 +148,7 @@ public class ValidateMKLDNN extends BaseDL4JTest { INDArray f = Nd4j.rand(Nd4j.defaultFloatingPointType(), inputSize); INDArray l = TestUtils.randomOneHot(minibatch, 10); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.FLOAT) .updater(new Adam(0.01)) .convolutionMode(cm) @@ -169,7 +168,7 @@ public class ValidateMKLDNN extends BaseDL4JTest { .nOut(3) .build()) .layer(new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build()) - .setInputType(InputType.convolutional(inputSize[2], inputSize[3], inputSize[1])) + .inputType(InputType.convolutional(inputSize[2], inputSize[3], inputSize[1])) .build(); MultiLayerNetwork netWith = new MultiLayerNetwork(conf.clone()); @@ -223,7 +222,7 @@ public class ValidateMKLDNN extends BaseDL4JTest { INDArray f = Nd4j.rand(Nd4j.defaultFloatingPointType(), inputSize); INDArray l = TestUtils.randomOneHot(minibatch, 10).castTo(DataType.FLOAT); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .updater(new Adam(0.01)) .convolutionMode(cm) .weightInit(new NormalDistribution(0,1)) @@ -242,7 +241,7 @@ public class ValidateMKLDNN extends BaseDL4JTest { .k(k[i]) .cudnnAllowFallback(false).build()) .layer(new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build()) - .setInputType(InputType.convolutional(inputSize[2], inputSize[3], inputSize[1])) + .inputType(InputType.convolutional(inputSize[2], inputSize[3], inputSize[1])) .build(); MultiLayerNetwork netWith = new MultiLayerNetwork(conf.clone()); @@ -292,7 +291,7 @@ public class ValidateMKLDNN extends BaseDL4JTest { INDArray dLdb = beta.ulike(); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .inferenceWorkspaceMode(WorkspaceMode.NONE) .trainingWorkspaceMode(WorkspaceMode.NONE) .list() diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/BackPropMLPTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/BackPropMLPTest.java index 94f26b712..27efa9149 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/BackPropMLPTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/BackPropMLPTest.java @@ -23,7 +23,6 @@ package org.deeplearning4j.nn.multilayer; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator; import org.deeplearning4j.nn.api.Layer; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; @@ -33,11 +32,9 @@ import org.deeplearning4j.nn.weights.WeightInit; import org.deeplearning4j.optimize.listeners.ScoreIterationListener; import org.junit.jupiter.api.Test; import org.nd4j.linalg.activations.Activation; -import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.iter.NdIndexIterator; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ops.impl.transforms.strict.SigmoidDerivative; -import org.nd4j.linalg.api.ops.impl.transforms.strict.TanhDerivative; import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.exception.ND4JArraySizeException; @@ -69,7 +66,7 @@ public class BackPropMLPTest extends BaseDL4JTest { @Test public void testMLP() { //Simple mini-batch test with multiple hidden layers - MultiLayerConfiguration conf = getIrisMLPSimpleConfig(new int[] {5, 4, 3}, Activation.SIGMOID); + NeuralNetConfiguration conf = getIrisMLPSimpleConfig(new int[] {5, 4, 3}, Activation.SIGMOID); // System.out.println(conf); MultiLayerNetwork network = new MultiLayerNetwork(conf); network.init(); @@ -83,7 +80,7 @@ public class BackPropMLPTest extends BaseDL4JTest { @Test public void testMLP2() { //Simple mini-batch test with multiple hidden layers - MultiLayerConfiguration conf = getIrisMLPSimpleConfig(new int[] {5, 15, 3}, Activation.TANH); + NeuralNetConfiguration conf = getIrisMLPSimpleConfig(new int[] {5, 15, 3}, Activation.TANH); // System.out.println(conf); MultiLayerNetwork network = new MultiLayerNetwork(conf); network.init(); @@ -322,9 +319,9 @@ public class BackPropMLPTest extends BaseDL4JTest { * Learning Rate = 0.1 * No regularization, no Adagrad, no momentum etc. One iteration. */ - private static MultiLayerConfiguration getIrisMLPSimpleConfig(int[] hiddenLayerSizes, + private static NeuralNetConfiguration getIrisMLPSimpleConfig(int[] hiddenLayerSizes, Activation activationFunction) { - NeuralNetConfiguration.ListBuilder lb = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1)) + NeuralNetConfiguration.NeuralNetConfigurationBuilder lb = NeuralNetConfiguration.builder().updater(new Sgd(0.1)) .seed(12345L).list(); for (int i = 0; i < hiddenLayerSizes.length; i++) { diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTest.java index 49d70647c..cad0cfd50 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTest.java @@ -20,9 +20,31 @@ package org.deeplearning4j.nn.multilayer; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNotSame; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.Set; import lombok.Data; import lombok.EqualsAndHashCode; import lombok.extern.slf4j.Slf4j; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.TestUtils; import org.deeplearning4j.datasets.iterator.ExistingDataSetIterator; @@ -31,12 +53,29 @@ import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; import org.deeplearning4j.datasets.iterator.impl.SingletonMultiDataSetIterator; import org.deeplearning4j.eval.Evaluation; import org.deeplearning4j.exception.DL4JException; -import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.nn.api.OptimizationAlgorithm; -import org.deeplearning4j.nn.conf.*; +import org.deeplearning4j.nn.conf.BackpropType; +import org.deeplearning4j.nn.conf.ConvolutionMode; +import org.deeplearning4j.nn.conf.GradientNormalization; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.Updater; +import org.deeplearning4j.nn.conf.WorkspaceMode; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.conf.layers.*; +import org.deeplearning4j.nn.conf.layers.ActivationLayer; +import org.deeplearning4j.nn.conf.layers.AutoEncoder; +import org.deeplearning4j.nn.conf.layers.BaseLayer; +import org.deeplearning4j.nn.conf.layers.BatchNormalization; +import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; +import org.deeplearning4j.nn.conf.layers.DenseLayer; +import org.deeplearning4j.nn.conf.layers.FeedForwardLayer; +import org.deeplearning4j.nn.conf.layers.GlobalPoolingLayer; +import org.deeplearning4j.nn.conf.layers.GravesLSTM; +import org.deeplearning4j.nn.conf.layers.LSTM; +import org.deeplearning4j.nn.conf.layers.LossLayer; +import org.deeplearning4j.nn.conf.layers.OutputLayer; +import org.deeplearning4j.nn.conf.layers.RnnOutputLayer; +import org.deeplearning4j.nn.conf.layers.SubsamplingLayer; import org.deeplearning4j.nn.conf.layers.objdetect.Yolo2OutputLayer; import org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder; import org.deeplearning4j.nn.conf.preprocessor.CnnToFeedForwardPreProcessor; @@ -58,6 +97,7 @@ import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; @@ -78,356 +118,349 @@ import org.nd4j.linalg.learning.config.Adam; import org.nd4j.linalg.learning.config.NoOp; import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; -import org.nd4j.common.primitives.Pair; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.util.*; - -import static org.junit.jupiter.api.Assertions.*; @Slf4j public class MultiLayerTest extends BaseDL4JTest { - private static OpExecutioner.ProfilingMode origMode; + private static OpExecutioner.ProfilingMode origMode; - @BeforeAll - public static void beforeClass(){ - origMode = Nd4j.getExecutioner().getProfilingMode(); + @BeforeAll + public static void beforeClass() { + origMode = Nd4j.getExecutioner().getProfilingMode(); + } + + @AfterAll + public static void afterClass() { + Nd4j.getExecutioner().setProfilingMode(origMode); + } + + private static NeuralNetConfiguration getConf() { + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder().seed(12345L) + .list().layer(0, + new DenseLayer.Builder().nIn(4).nOut(3) + + .dist(new NormalDistribution(0, 1)) + .build()) + .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX).nIn(3).nOut(3) + + .dist(new NormalDistribution(0, 1)).build()) + .build(); + return conf; + } + + public static float[] asFloat(INDArray arr) { + long len = arr.length(); + + float[] f = new float[(int) len]; + for (int i = 0; i < len; i++) { + f[i] = arr.getFloat(i); + } + return f; + } + + @BeforeEach + public void before() { + Nd4j.getExecutioner().setProfilingMode(OpExecutioner.ProfilingMode.SCOPE_PANIC); + } + + @Override + public DataType getDataType() { + return DataType.FLOAT; + } + + @Test + public void testSetParams() { + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() + .list().layer(0, + new DenseLayer.Builder().nIn(4).nOut(3) + .activation(Activation.TANH).build()) + .layer(1, new DenseLayer.Builder().nIn(3).nOut(2).build()) + .build(); + + MultiLayerNetwork network3 = new MultiLayerNetwork(conf); + network3.init(); + + INDArray params = network3.params(); + INDArray weights = network3.getLayer(0).getParam(DefaultParamInitializer.WEIGHT_KEY).dup(); + INDArray bias = network3.getLayer(0).getParam(DefaultParamInitializer.BIAS_KEY).dup(); + network3.setParameters(params); + assertEquals(weights, network3.getLayer(0).getParam(DefaultParamInitializer.WEIGHT_KEY)); + assertEquals(bias, network3.getLayer(0).getParam(DefaultParamInitializer.BIAS_KEY)); + INDArray params4 = network3.params(); + assertEquals(params, params4); + } + + @Test + public void testBatchNorm() { + Nd4j.getRandom().setSeed(123); + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() + .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).seed(123).list() + .layer(0, new DenseLayer.Builder().nIn(4).nOut(3).weightInit(WeightInit.XAVIER) + .activation(Activation.TANH).build()) + .layer(1, new DenseLayer.Builder().nIn(3).nOut(2).weightInit(WeightInit.XAVIER) + .activation(Activation.TANH).build()) + .layer(2, new BatchNormalization.Builder().nOut(2).build()) + .layer(3, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + LossFunctions.LossFunction.MCXENT).weightInit(WeightInit.XAVIER) + .activation(Activation.SOFTMAX).nIn(2).nOut(3).build()) + .build(); + + MultiLayerNetwork network = new MultiLayerNetwork(conf); + network.init(); + network.setListeners(new ScoreIterationListener(1)); + + DataSetIterator iter = new IrisDataSetIterator(150, 150); + + DataSet next = iter.next(); + next.normalizeZeroMeanZeroUnitVariance(); + SplitTestAndTrain trainTest = next.splitTestAndTrain(110); + network.setLabels(trainTest.getTrain().getLabels()); + network.init(); + for (int i = 0; i < 5; i++) { + network.fit(trainTest.getTrain()); } - @BeforeEach - public void before(){ - Nd4j.getExecutioner().setProfilingMode(OpExecutioner.ProfilingMode.SCOPE_PANIC); + } + + @Test + public void testBackProp() { + Nd4j.getRandom().setSeed(123); + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() + .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).seed(123).list() + .layer(0, new DenseLayer.Builder().nIn(4).nOut(3).weightInit(WeightInit.XAVIER) + .activation(Activation.TANH).build()) + .layer(1, new DenseLayer.Builder().nIn(3).nOut(2).weightInit(WeightInit.XAVIER) + .activation(Activation.TANH).build()) + .layer(2, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + LossFunctions.LossFunction.MCXENT).weightInit(WeightInit.XAVIER) + .activation(Activation.SOFTMAX).nIn(2).nOut(3).build()) + .build(); + + MultiLayerNetwork network = new MultiLayerNetwork(conf); + network.init(); + network.setListeners(new ScoreIterationListener(1)); + + DataSetIterator iter = new IrisDataSetIterator(150, 150); + + DataSet next = iter.next(); + next.normalizeZeroMeanZeroUnitVariance(); + SplitTestAndTrain trainTest = next.splitTestAndTrain(110); + network.setInput(trainTest.getTrain().getFeatures()); + network.setLabels(trainTest.getTrain().getLabels()); + network.init(); + for (int i = 0; i < 5; i++) { + network.fit(trainTest.getTrain()); } - @AfterAll - public static void afterClass(){ - Nd4j.getExecutioner().setProfilingMode(origMode); + DataSet test = trainTest.getTest(); + Evaluation eval = new Evaluation(); + INDArray output = network.output(test.getFeatures()); + eval.eval(test.getLabels(), output); + log.info("Score " + eval.stats()); + } + + @Test + public void testGradientWithAsList() { + MultiLayerNetwork net1 = new MultiLayerNetwork(getConf()); + MultiLayerNetwork net2 = new MultiLayerNetwork(getConf()); + net1.init(); + net2.init(); + + DataSet x1 = new IrisDataSetIterator(1, 150).next(); + DataSet all = new IrisDataSetIterator(150, 150).next(); + DataSet x2 = all.asList().get(0); + + //x1 and x2 contain identical data + assertArrayEquals(asFloat(x1.getFeatures()), asFloat(x2.getFeatures()), 0.0f); + assertArrayEquals(asFloat(x1.getLabels()), asFloat(x2.getLabels()), 0.0f); + assertEquals(x1, x2); + + //Set inputs/outputs so gradient can be calculated: + net1.feedForward(x1.getFeatures()); + net2.feedForward(x2.getFeatures()); + ((BaseOutputLayer) net1.getLayer(1)).setLabels(x1.getLabels()); + ((BaseOutputLayer) net2.getLayer(1)).setLabels(x2.getLabels()); + + net1.gradient(); + net2.gradient(); + } + + /** + * This test intended only to test activateSelectedLayers method, it does not involves + * fully-working AutoEncoder. + */ + @Test + public void testSelectedActivations() { + // Train DeepAutoEncoder on very limited trainset + final int numRows = 28; + final int numColumns = 28; + int seed = 123; + int numSamples = 3; + int iterations = 1; + int listenerFreq = iterations / 5; + + log.info("Load data...."); + + float[][] trainingData = new float[numSamples][numColumns * numRows]; + Arrays.fill(trainingData[0], 0.95f); + Arrays.fill(trainingData[1], 0.5f); + Arrays.fill(trainingData[2], 0.05f); + + log.info("Build model...."); + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(seed) + .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).list() + .layer(0, new DenseLayer.Builder().nIn(numRows * numColumns).nOut(1000).build()) + .layer(1, new DenseLayer.Builder().nIn(1000).nOut(500).build()) + .layer(2, new DenseLayer.Builder().nIn(500).nOut(250).build()) + .layer(3, new DenseLayer.Builder().nIn(250).nOut(100).build()) + .layer(4, new DenseLayer.Builder().nIn(100).nOut(30).build()) //encoding stops + .layer(5, new DenseLayer.Builder().nIn(30).nOut(100).build()) //decoding starts + .layer(6, new DenseLayer.Builder().nIn(100).nOut(250).build()) + .layer(7, new DenseLayer.Builder().nIn(250).nOut(500).build()) + .layer(8, new DenseLayer.Builder().nIn(500).nOut(1000).build()) + .layer(9, + new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).nIn(1000) + .nOut(numRows * numColumns).activation(Activation.SOFTMAX).build()) + .build(); + + MultiLayerNetwork model = new MultiLayerNetwork(conf); + model.init(); + + model.addListeners(new ScoreIterationListener(listenerFreq)); + + log.info("Train model...."); + int cnt = 0; + while (cnt < numSamples) { + INDArray input = Nd4j.create(trainingData[cnt]).reshape(1, -1); + model.fit(new DataSet(input, input)); + cnt++; } + // Make two separate selective calls - @Override - public DataType getDataType(){ - return DataType.FLOAT; + log.info("Testing full cycle..."); + + List comparableResult = model.feedForward( + Nd4j.create(trainingData[0], 1, trainingData[0].length)); + + INDArray encodeResult = model.activateSelectedLayers(0, 4, + Nd4j.create(trainingData[0], 1, trainingData[0].length)); + + log.info("Compare feedForward results with selectedActivation"); + + assertEquals(comparableResult.get(5), encodeResult); + + INDArray decodeResults = model.activateSelectedLayers(5, 9, encodeResult); + + log.info("Decode results: " + decodeResults.columns() + " " + decodeResults); + log.info( + "Comparable results: " + comparableResult.get(10).columns() + " " + comparableResult.get( + 10)); + + assertEquals(comparableResult.get(10), decodeResults); + } + + @Test + public void testFeedForwardToLayer() { + + int nIn = 30; + int nOut = 25; + + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder().optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT) + .updater(new Sgd(1e-3)) + .list().layer( + 0, new DenseLayer.Builder().nIn(nIn).nOut(600) + + .dist(new NormalDistribution(0, 1e-5)) + .build()) + .layer(1, new DenseLayer.Builder() + .nIn(600).nOut(250) + .dist(new NormalDistribution(0, 1e-5)) + .build()) + .layer(2, new DenseLayer.Builder() + .nIn(250).nOut(100) + .dist(new NormalDistribution(0, 1e-5)) + .build()) + .layer(3, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + LossFunctions.LossFunction.MCXENT).nIn(100).nOut(25) + .activation(Activation.SOFTMAX) + .weightInit(new NormalDistribution(0, 1e-5)).build()) + .build(); + + MultiLayerNetwork network = new MultiLayerNetwork(conf); + network.init(); + + INDArray input = Nd4j.rand(5, nIn); + + List activations = network.feedForward(input); + assertEquals(5, activations.size()); //4 layers + input + + List activationsAll = network.feedForwardToLayer(3, input); + assertEquals(activations, activationsAll); + + for (int i = 3; i >= 0; i--) { + List activationsPartial = network.feedForwardToLayer(i, input); + assertEquals(i + 2, + activationsPartial.size()); //i+2: for layer 3: input + activations of {0,1,2,3} -> 5 total = 3+2 + for (int j = 0; j <= i; j++) { + INDArray exp = activationsAll.get(j); + INDArray act = activationsPartial.get(j); + assertEquals(exp, act); + } } + } - @Test - public void testSetParams() { - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder() - .list().layer(0, - new DenseLayer.Builder().nIn(4).nOut(3) - .activation(Activation.TANH).build()) - .layer(1, new DenseLayer.Builder().nIn(3).nOut(2).build()) - .build(); - MultiLayerNetwork network3 = new MultiLayerNetwork(conf); - network3.init(); + @Test + public void testBackpropGradient() { + //Testing: MultiLayerNetwork.backpropGradient() + //i.e., specifically without an output layer - INDArray params = network3.params(); - INDArray weights = network3.getLayer(0).getParam(DefaultParamInitializer.WEIGHT_KEY).dup(); - INDArray bias = network3.getLayer(0).getParam(DefaultParamInitializer.BIAS_KEY).dup(); - network3.setParameters(params); - assertEquals(weights, network3.getLayer(0).getParam(DefaultParamInitializer.WEIGHT_KEY)); - assertEquals(bias, network3.getLayer(0).getParam(DefaultParamInitializer.BIAS_KEY)); - INDArray params4 = network3.params(); - assertEquals(params, params4); + int nIn = 10; + int nOut = 40; + int miniBatch = 5; + + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() + .updater(new Sgd(0.1)).list() + .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(20).activation(Activation.RELU) + .weightInit(WeightInit.XAVIER).build()) + .layer(1, new DenseLayer.Builder().nIn(20).nOut(30).activation(Activation.RELU) + .weightInit(WeightInit.XAVIER).build()) + .layer(2, new DenseLayer.Builder().nIn(30).nOut(nOut).activation(Activation.RELU) + .weightInit(WeightInit.XAVIER).build()) + .build(); + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + Nd4j.getRandom().setSeed(12345); + INDArray eps = Nd4j.rand(miniBatch, nOut); + INDArray input = Nd4j.rand(miniBatch, nIn); + + net.setInput(input); + net.feedForward(true, false); //Need to feed forward before backprop + + Pair pair = net.backpropGradient(eps, LayerWorkspaceMgr.noWorkspaces()); + INDArray epsOut = pair.getSecond(); + assertNotNull(epsOut); + assertArrayEquals(new long[]{miniBatch, nIn}, epsOut.shape()); + + Gradient g = pair.getFirst(); + Map gradMap = g.gradientForVariable(); + assertEquals(6, gradMap.size()); //3 layers, weight + bias gradients for each + + String[] expKeys = {"0_" + DefaultParamInitializer.WEIGHT_KEY, + "0_" + DefaultParamInitializer.BIAS_KEY, + "1_" + DefaultParamInitializer.WEIGHT_KEY, "2_" + DefaultParamInitializer.BIAS_KEY, + "2_" + DefaultParamInitializer.WEIGHT_KEY, "2_" + DefaultParamInitializer.BIAS_KEY}; + Set keys = gradMap.keySet(); + for (String s : expKeys) { + assertTrue(keys.contains(s)); } - @Test - public void testBatchNorm() { - Nd4j.getRandom().setSeed(123); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).seed(123).list() - .layer(0, new DenseLayer.Builder().nIn(4).nOut(3).weightInit(WeightInit.XAVIER) - .activation(Activation.TANH).build()) - .layer(1, new DenseLayer.Builder().nIn(3).nOut(2).weightInit(WeightInit.XAVIER) - .activation(Activation.TANH).build()) - .layer(2, new BatchNormalization.Builder().nOut(2).build()) - .layer(3, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( - LossFunctions.LossFunction.MCXENT).weightInit(WeightInit.XAVIER) - .activation(Activation.SOFTMAX).nIn(2).nOut(3).build()) - .build(); - - - MultiLayerNetwork network = new MultiLayerNetwork(conf); - network.init(); - network.setListeners(new ScoreIterationListener(1)); - - DataSetIterator iter = new IrisDataSetIterator(150, 150); - - DataSet next = iter.next(); - next.normalizeZeroMeanZeroUnitVariance(); - SplitTestAndTrain trainTest = next.splitTestAndTrain(110); - network.setLabels(trainTest.getTrain().getLabels()); - network.init(); - for( int i=0; i<5; i++ ) { - network.fit(trainTest.getTrain()); - } - - } - - @Test - public void testBackProp() { - Nd4j.getRandom().setSeed(123); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).seed(123).list() - .layer(0, new DenseLayer.Builder().nIn(4).nOut(3).weightInit(WeightInit.XAVIER) - .activation(Activation.TANH).build()) - .layer(1, new DenseLayer.Builder().nIn(3).nOut(2).weightInit(WeightInit.XAVIER) - .activation(Activation.TANH).build()) - .layer(2, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( - LossFunctions.LossFunction.MCXENT).weightInit(WeightInit.XAVIER) - .activation(Activation.SOFTMAX).nIn(2).nOut(3).build()) - .build(); - - - MultiLayerNetwork network = new MultiLayerNetwork(conf); - network.init(); - network.setListeners(new ScoreIterationListener(1)); - - DataSetIterator iter = new IrisDataSetIterator(150, 150); - - DataSet next = iter.next(); - next.normalizeZeroMeanZeroUnitVariance(); - SplitTestAndTrain trainTest = next.splitTestAndTrain(110); - network.setInput(trainTest.getTrain().getFeatures()); - network.setLabels(trainTest.getTrain().getLabels()); - network.init(); - for( int i=0; i<5; i++ ) { - network.fit(trainTest.getTrain()); - } - - DataSet test = trainTest.getTest(); - Evaluation eval = new Evaluation(); - INDArray output = network.output(test.getFeatures()); - eval.eval(test.getLabels(), output); - log.info("Score " + eval.stats()); - } - - - - @Test - public void testGradientWithAsList() { - MultiLayerNetwork net1 = new MultiLayerNetwork(getConf()); - MultiLayerNetwork net2 = new MultiLayerNetwork(getConf()); - net1.init(); - net2.init(); - - DataSet x1 = new IrisDataSetIterator(1, 150).next(); - DataSet all = new IrisDataSetIterator(150, 150).next(); - DataSet x2 = all.asList().get(0); - - //x1 and x2 contain identical data - assertArrayEquals(asFloat(x1.getFeatures()), asFloat(x2.getFeatures()), 0.0f); - assertArrayEquals(asFloat(x1.getLabels()), asFloat(x2.getLabels()), 0.0f); - assertEquals(x1, x2); - - //Set inputs/outputs so gradient can be calculated: - net1.feedForward(x1.getFeatures()); - net2.feedForward(x2.getFeatures()); - ((BaseOutputLayer) net1.getLayer(1)).setLabels(x1.getLabels()); - ((BaseOutputLayer) net2.getLayer(1)).setLabels(x2.getLabels()); - - net1.gradient(); - net2.gradient(); - } - - /** - * This test intended only to test activateSelectedLayers method, it does not involves fully-working AutoEncoder. - */ - @Test - public void testSelectedActivations() { - // Train DeepAutoEncoder on very limited trainset - final int numRows = 28; - final int numColumns = 28; - int seed = 123; - int numSamples = 3; - int iterations = 1; - int listenerFreq = iterations / 5; - - log.info("Load data...."); - - float[][] trainingData = new float[numSamples][numColumns * numRows]; - Arrays.fill(trainingData[0], 0.95f); - Arrays.fill(trainingData[1], 0.5f); - Arrays.fill(trainingData[2], 0.05f); - - - - log.info("Build model...."); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(seed) - .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).list() - .layer(0, new DenseLayer.Builder().nIn(numRows * numColumns).nOut(1000).build()) - .layer(1, new DenseLayer.Builder().nIn(1000).nOut(500).build()) - .layer(2, new DenseLayer.Builder().nIn(500).nOut(250).build()) - .layer(3, new DenseLayer.Builder().nIn(250).nOut(100).build()) - .layer(4, new DenseLayer.Builder().nIn(100).nOut(30).build()) //encoding stops - .layer(5, new DenseLayer.Builder().nIn(30).nOut(100).build()) //decoding starts - .layer(6, new DenseLayer.Builder().nIn(100).nOut(250).build()) - .layer(7, new DenseLayer.Builder().nIn(250).nOut(500).build()) - .layer(8, new DenseLayer.Builder().nIn(500).nOut(1000).build()) - .layer(9, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).nIn(1000) - .nOut(numRows * numColumns).activation(Activation.SOFTMAX).build()) - .build(); - - MultiLayerNetwork model = new MultiLayerNetwork(conf); - model.init(); - - model.addListeners(new ScoreIterationListener(listenerFreq)); - - log.info("Train model...."); - int cnt = 0; - while (cnt < numSamples) { - INDArray input = Nd4j.create(trainingData[cnt]).reshape(1, -1); - model.fit(new DataSet(input, input)); - cnt++; - } - // Make two separate selective calls - - log.info("Testing full cycle..."); - - List comparableResult = model.feedForward(Nd4j.create(trainingData[0], 1, trainingData[0].length)); - - INDArray encodeResult = model.activateSelectedLayers(0, 4, Nd4j.create(trainingData[0], 1, trainingData[0].length)); - - log.info("Compare feedForward results with selectedActivation"); - - assertEquals(comparableResult.get(5), encodeResult); - - INDArray decodeResults = model.activateSelectedLayers(5, 9, encodeResult); - - - log.info("Decode results: " + decodeResults.columns() + " " + decodeResults); - log.info("Comparable results: " + comparableResult.get(10).columns() + " " + comparableResult.get(10)); - - assertEquals(comparableResult.get(10), decodeResults); - } - - private static MultiLayerConfiguration getConf() { - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().seed(12345L) - .list().layer(0, - new DenseLayer.Builder().nIn(4).nOut(3) - - .dist(new NormalDistribution(0,1)) - .build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( - LossFunctions.LossFunction.MCXENT) - .activation(Activation.SOFTMAX).nIn(3).nOut(3) - - .dist(new NormalDistribution(0, 1)).build()) - .build(); - return conf; - } - - public static float[] asFloat(INDArray arr) { - long len = arr.length(); - - float[] f = new float[(int) len]; - for (int i = 0; i < len; i++) - f[i] = arr.getFloat(i); - return f; - } - - @Test - public void testFeedForwardToLayer() { - - int nIn = 30; - int nOut = 25; - - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT) - .updater(new Sgd(1e-3)) - .list().layer( - 0, new DenseLayer.Builder().nIn(nIn).nOut(600) - - .dist(new NormalDistribution(0,1e-5)) - .build()) - .layer(1, new DenseLayer.Builder() - .nIn(600).nOut(250) - .dist(new NormalDistribution(0, 1e-5)) - .build()) - .layer(2, new DenseLayer.Builder() - .nIn(250).nOut(100) - .dist(new NormalDistribution(0, 1e-5)) - .build()) - .layer(3, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( - LossFunctions.LossFunction.MCXENT).nIn(100).nOut(25) - .activation(Activation.SOFTMAX) - .weightInit(new NormalDistribution(0, 1e-5)).build()) - .build(); - - MultiLayerNetwork network = new MultiLayerNetwork(conf); - network.init(); - - - INDArray input = Nd4j.rand(5, nIn); - - List activations = network.feedForward(input); - assertEquals(5, activations.size()); //4 layers + input - - List activationsAll = network.feedForwardToLayer(3, input); - assertEquals(activations, activationsAll); - - for (int i = 3; i >= 0; i--) { - List activationsPartial = network.feedForwardToLayer(i, input); - assertEquals(i + 2, activationsPartial.size()); //i+2: for layer 3: input + activations of {0,1,2,3} -> 5 total = 3+2 - for (int j = 0; j <= i; j++) { - INDArray exp = activationsAll.get(j); - INDArray act = activationsPartial.get(j); - assertEquals(exp, act); - } - } - } - - - @Test - public void testBackpropGradient() { - //Testing: MultiLayerNetwork.backpropGradient() - //i.e., specifically without an output layer - - int nIn = 10; - int nOut = 40; - int miniBatch = 5; - - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .updater(new Sgd(0.1)).list() - .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(20).activation(Activation.RELU) - .weightInit(WeightInit.XAVIER).build()) - .layer(1, new DenseLayer.Builder().nIn(20).nOut(30).activation(Activation.RELU) - .weightInit(WeightInit.XAVIER).build()) - .layer(2, new DenseLayer.Builder().nIn(30).nOut(nOut).activation(Activation.RELU) - .weightInit(WeightInit.XAVIER).build()) - .build(); - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - - Nd4j.getRandom().setSeed(12345); - INDArray eps = Nd4j.rand(miniBatch, nOut); - INDArray input = Nd4j.rand(miniBatch, nIn); - - net.setInput(input); - net.feedForward(true, false); //Need to feed forward before backprop - - Pair pair = net.backpropGradient(eps, LayerWorkspaceMgr.noWorkspaces()); - INDArray epsOut = pair.getSecond(); - assertNotNull(epsOut); - assertArrayEquals(new long[] {miniBatch, nIn}, epsOut.shape()); - - Gradient g = pair.getFirst(); - Map gradMap = g.gradientForVariable(); - assertEquals(6, gradMap.size()); //3 layers, weight + bias gradients for each - - String[] expKeys = {"0_" + DefaultParamInitializer.WEIGHT_KEY, "0_" + DefaultParamInitializer.BIAS_KEY, - "1_" + DefaultParamInitializer.WEIGHT_KEY, "2_" + DefaultParamInitializer.BIAS_KEY, - "2_" + DefaultParamInitializer.WEIGHT_KEY, "2_" + DefaultParamInitializer.BIAS_KEY}; - Set keys = gradMap.keySet(); - for (String s : expKeys) { - assertTrue(keys.contains(s)); - } - /* System.out.println(pair); @@ -443,1092 +476,1114 @@ public class MultiLayerTest extends BaseDL4JTest { net.setParams(params); //params() may not be in-place System.out.println(Arrays.toString(params.get(NDArrayIndex.all(), NDArrayIndex.interval(0, 10)).dup().data().asFloat())); */ + } + + @Test + public void testLayerNames() { + int nIn = 10; + int nOut = 40; + + List layerNameList = new ArrayList<>(); + layerNameList.add("dnn1"); + layerNameList.add("dnn2"); + layerNameList.add("dnn3"); + + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() + .updater(new Sgd(0.1)).list() + .layer(0, + new DenseLayer.Builder().name("dnn1").nIn(nIn).nOut(20).activation(Activation.RELU) + .weightInit(WeightInit.XAVIER).build()) + .layer(1, new DenseLayer.Builder().name("dnn2").nIn(20).nOut(30).activation(Activation.RELU) + .weightInit(WeightInit.XAVIER).build()) + .layer(2, new DenseLayer.Builder().name("dnn3").nIn(30).nOut(nOut) + .activation(Activation.SOFTMAX).weightInit(WeightInit.XAVIER).build()) + .build(); + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + assertEquals(layerNameList.get(0), net.getLayer(0).getLayerConfiguration().getLayerName()); + assertEquals(layerNameList, net.getLayerNames()); + BaseLayer b = (BaseLayer) net.getLayer(layerNameList.get(2)).getLayerConfiguration(); + assertEquals("softmax", b.getActivationFn().toString()); + } + + + @Test + public void testScoreExamples() { + Nd4j.getRandom().setSeed(12345); + int nIn = 5; + int nOut = 6; + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345).l1(0.01) + .l2(0.01).updater(new Sgd(0.1)).activation(Activation.TANH).weightInit(WeightInit.XAVIER) + .list() + .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(20).build()) + .layer(1, new DenseLayer.Builder().nIn(20).nOut(30).build()) + .layer(2, new OutputLayer.Builder() + .lossFunction(LossFunctions.LossFunction.MSE).nIn(30).nOut(nOut).build()) + .build(); + + NeuralNetConfiguration confNoReg = NeuralNetConfiguration.builder().seed(12345) + .updater(new Sgd(0.1)).activation(Activation.TANH).weightInit(WeightInit.XAVIER).list() + .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(20).build()) + .layer(1, new DenseLayer.Builder().nIn(20).nOut(30).build()) + .layer(2, new OutputLayer.Builder() + .lossFunction(LossFunctions.LossFunction.MSE).nIn(30).nOut(nOut).build()) + .build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + MultiLayerNetwork netNoReg = new MultiLayerNetwork(confNoReg); + netNoReg.init(); + netNoReg.setParameters(net.params().dup()); + + //Score single example, and compare to scoreExamples: + INDArray input = Nd4j.rand(3, nIn); + INDArray output = Nd4j.rand(3, nOut); + DataSet ds = new DataSet(input, output); + + INDArray scoresWithRegularization = net.scoreExamples(ds, true); + INDArray scoresNoRegularization = net.scoreExamples(ds, false); + + assertArrayEquals(new long[]{3, 1}, scoresWithRegularization.shape()); + assertArrayEquals(new long[]{3, 1}, scoresNoRegularization.shape()); + + for (int i = 0; i < 3; i++) { + DataSet singleEx = new DataSet(input.getRow(i, true), output.getRow(i, true)); + double score = net.score(singleEx); + double scoreNoReg = netNoReg.score(singleEx); + + double scoreUsingScoreExamples = scoresWithRegularization.getDouble(i); + double scoreUsingScoreExamplesNoReg = scoresNoRegularization.getDouble(i); + assertEquals(score, scoreUsingScoreExamples, 1e-4); + assertEquals(scoreNoReg, scoreUsingScoreExamplesNoReg, 1e-4); + assertTrue(scoreUsingScoreExamples + > scoreUsingScoreExamplesNoReg); //Regularization term increases score + + // System.out.println(score + "\t" + scoreUsingScoreExamples + "\t|\t" + scoreNoReg + "\t" + scoreUsingScoreExamplesNoReg); + } + } + + @Test + public void testDataSetScore() { + + Nd4j.getRandom().setSeed(12345); + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() + .weightInit(WeightInit.XAVIER).seed(12345L).list() + .layer(0, new DenseLayer.Builder().nIn(4).nOut(3).activation(Activation.SIGMOID).build()) + .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX).nIn(3).nOut(3).build()) + .build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + INDArray in = Nd4j.create(new double[]{1.0, 2.0, 3.0, 4.0}, 1, 4); + INDArray out = Nd4j.create(new double[]{1, 0, 0}, 1, 3); + + double score = net.score(new DataSet(in, out)); + } + + @Test + public void testDataSetScoreCNN() { + + int miniBatch = 3; + int depth = 2; + int width = 3; + int height = 3; + int nOut = 2; + + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() + .seed(12345L).list().layer(0, new ConvolutionLayer.Builder(2, 2).nOut(1).build()) + .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX).nOut(2).build()) + .inputType(InputType.convolutionalFlat(height, width, depth)) + .build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + Nd4j.getRandom().setSeed(12345); + Random r = new Random(12345); + INDArray input = Nd4j.rand(miniBatch, depth * width * height); + INDArray labels = Nd4j.create(miniBatch, nOut); + for (int i = 0; i < miniBatch; i++) { + labels.putScalar(new int[]{i, r.nextInt(nOut)}, 1.0); } - @Test - public void testLayerNames() { - int nIn = 10; - int nOut = 40; + double score = net.score(new DataSet(input, labels)); + } - List layerNameList = new ArrayList<>(); - layerNameList.add("dnn1"); - layerNameList.add("dnn2"); - layerNameList.add("dnn3"); + @Test + public void testPredict() throws Exception { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .updater(new Sgd(0.1)).list() - .layer(0, new DenseLayer.Builder().name("dnn1").nIn(nIn).nOut(20).activation(Activation.RELU) - .weightInit(WeightInit.XAVIER).build()) - .layer(1, new DenseLayer.Builder().name("dnn2").nIn(20).nOut(30).activation(Activation.RELU) - .weightInit(WeightInit.XAVIER).build()) - .layer(2, new DenseLayer.Builder().name("dnn3").nIn(30).nOut(nOut) - .activation(Activation.SOFTMAX).weightInit(WeightInit.XAVIER).build()) - .build(); - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); + Nd4j.getRandom().setSeed(12345); + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() + .weightInit(WeightInit.XAVIER).seed(12345L).list() + .layer(0, new DenseLayer.Builder().nIn(784).nOut(50).activation(Activation.RELU).build()) + .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX).nIn(50).nOut(10).build()) + .inputType(InputType.convolutional(28, 28, 1)).build(); - assertEquals(layerNameList.get(0), net.getLayer(0).conf().getLayer().getLayerName()); - assertEquals(layerNameList, net.getLayerNames()); - BaseLayer b = (BaseLayer) net.getLayer(layerNameList.get(2)).conf().getLayer(); - assertEquals("softmax", b.getActivationFn().toString()); + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + DataSetIterator ds = new MnistDataSetIterator(10, 10); + net.fit(ds); + + DataSetIterator testDs = new MnistDataSetIterator(1, 1); + DataSet testData = testDs.next(); + testData.setLabelNames(Arrays.asList("0", "1", "2", "3", "4", "5", "6", "7", "8", "9")); + String actualLables = testData.getLabelName(0); + List prediction = net.predict(testData); + assertNotNull(actualLables); + assertNotNull(prediction.get(0)); + } + + @Test + //@Ignore + public void testCid() throws Exception { + System.out.println(EnvironmentUtils.buildCId()); + + Environment environment = EnvironmentUtils.buildEnvironment(); + environment.setSerialVersionID(EnvironmentUtils.buildCId()); + + Task task = TaskUtils.buildTask(Nd4j.create(new double[]{1, 2, 3, 4, 5, 6}, 1, 6)); + + Heartbeat.getInstance().reportEvent(Event.STANDALONE, environment, task); + + Thread.sleep(25000); + } + + @Test + public void testOutput() throws Exception { + Nd4j.getRandom().setSeed(12345); + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() + .weightInit(WeightInit.XAVIER).seed(12345L).list() + .layer(0, new DenseLayer.Builder().nIn(784).nOut(50).activation(Activation.RELU).build()) + .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX).nIn(50).nOut(10).build()) + .inputType(InputType.convolutional(28, 28, 1)).build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + DataSetIterator fullData = new MnistDataSetIterator(1, 2); + net.fit(fullData); + + fullData.reset(); + DataSet expectedSet = fullData.next(2); + INDArray expectedOut = net.output(expectedSet.getFeatures(), false); + + fullData.reset(); + + INDArray actualOut = net.output(fullData); + + assertEquals(expectedOut, actualOut); + } + + @Test + public void testGradientUpdate() throws Exception { + DataSetIterator iter = new IrisDataSetIterator(1, 1); + + Gradient expectedGradient = new DefaultGradient(); + expectedGradient.setGradientFor("0_W", Nd4j.ones(4, 5)); + expectedGradient.setGradientFor("0_b", Nd4j.ones(1, 5)); + expectedGradient.setGradientFor("1_W", Nd4j.ones(5, 3)); + expectedGradient.setGradientFor("1_b", Nd4j.ones(1, 3)); + + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder().updater(new Sgd(1.0)) + .activation(Activation.RELU).weightInit(WeightInit.XAVIER) + .list().layer(0, new DenseLayer.Builder().name("dnn1").nIn(4).nOut(5).build()) + .layer(1, new OutputLayer.Builder().name("output").nIn(5).nOut(3) + .activation(Activation.SOFTMAX).weightInit(WeightInit.XAVIER) + .build()) + .build(); + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + net.fit(iter.next()); + // TODO validate actual layer gradientView - issue getting var out of BaseLayer w/o adding MLN getter that gets confused with local gradient vars + Gradient actualGradient = net.gradient; + assertNotEquals(expectedGradient.getGradientFor("0_W"), actualGradient.getGradientFor("0_W")); + + net.update(expectedGradient); + actualGradient = net.gradient; + assertEquals(expectedGradient.getGradientFor("0_W"), actualGradient.getGradientFor("0_W")); + + // Update params with set + net.setParam("0_W", Nd4j.ones(4, 5)); + net.setParam("0_b", Nd4j.ones(1, 5)); + net.setParam("1_W", Nd4j.ones(5, 3)); + net.setParam("1_b", Nd4j.ones(1, 3)); + INDArray actualParams = net.params(); + + // Confirm params + assertEquals(expectedGradient.gradient(), actualParams); + + net.update(expectedGradient); + actualParams = net.params(); + assertEquals(Nd4j.ones(1, 43).addi(1), actualParams); + } + + + @Test + public void testCnnInvalidData() { + assertThrows(DL4JException.class, () -> { + int miniBatch = 3; + int depth = 2; + int width = 5; + int height = 5; + + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() + .layer(0, + new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1).padding(0, 0).nIn(2) + .nOut(2).build()) + .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX).nOut(2).build()) + .inputType(InputType.convolutional(height, width, depth)) + .build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + INDArray inputWrongDepth = Nd4j.rand(miniBatch, 5, height, + width); //Order: examples, channels, height, width + net.feedForward(inputWrongDepth); + }); + } + + @Test + public void testApplyingPreTrainConfigAndParams() { + int nIn = 10; + int nOut = 10; + + // Test pretrain true + MultiLayerNetwork aePre = getAeModel(true, nIn, nOut); + int actualNP = (int) aePre.numParams(); + assertEquals(2 * (nIn * nOut + nOut) + nIn, actualNP); + INDArray params = aePre.params(); + assertEquals(params.length(), actualNP); // check num params + Map paramTable = aePre.getParamTable(); + assertTrue(paramTable.containsKey("0_vb")); // check vb exists for pretrain layer + aePre.setParam("0_vb", Nd4j.ones(10)); + params = aePre.getParam("0_vb"); + assertEquals(Nd4j.ones(1, 10), params); // check set params for vb + + // Test pretrain false, expect same for true because its not changed when applying update + MultiLayerNetwork aeNoPre = getAeModel(false, nIn, nOut); + actualNP = (int) aeNoPre.numParams(); + assertEquals(2 * (nIn * nOut + nOut) + nIn, actualNP); + params = aeNoPre.params(); + assertEquals(params.length(), actualNP); + paramTable = aePre.getParamTable(); + assertTrue(paramTable.containsKey("0_vb")); + } + + public MultiLayerNetwork getAeModel(boolean preTrain, int nIn, int nOut) { + NeuralNetConfiguration vae = NeuralNetConfiguration.builder() + .seed(42).updater(new NoOp()) + .weightInit(WeightInit.UNIFORM) + .layer(new AutoEncoder.Builder() + .activation(Activation.IDENTITY).nOut(nIn).build()) + .layer( + new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + LossFunctions.LossFunction.COSINE_PROXIMITY) + .activation(Activation.IDENTITY).nOut(nOut) + .build()) + + .inputType(InputType.feedForward(nOut)).build(); + MultiLayerNetwork network = new MultiLayerNetwork(vae); + network.init(); + return network; + } + + + @Test + public void testIterationCountAndPersistence() throws IOException { + Nd4j.getRandom().setSeed(123); + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() + .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(123) + .list() + .layer(0, new DenseLayer.Builder().nIn(4).nOut(3).weightInit(WeightInit.XAVIER) + .activation(Activation.TANH).build()) + .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(3).nOut(3) + .build()) + .build(); + + MultiLayerNetwork network = new MultiLayerNetwork(conf); + network.init(); + + DataSetIterator iter = new IrisDataSetIterator(50, 150); + + assertEquals(0, network.getConfiguration().getIterationCount()); + network.fit(iter); + assertEquals(3, network.getConfiguration().getIterationCount()); + iter.reset(); + network.fit(iter); + assertEquals(6, network.getConfiguration().getIterationCount()); + iter.reset(); + network.fit(iter.next()); + assertEquals(7, network.getConfiguration().getIterationCount()); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + ModelSerializer.writeModel(network, baos, true); + byte[] asBytes = baos.toByteArray(); + + ByteArrayInputStream bais = new ByteArrayInputStream(asBytes); + MultiLayerNetwork net = ModelSerializer.restoreMultiLayerNetwork(bais, true); + assertEquals(7, net.getConfiguration().getIterationCount()); + } + + + @Test + public void testBiasL1L2() { + + Nd4j.getRandom().setSeed(123); + NeuralNetConfiguration conf1 = NeuralNetConfiguration.builder() + .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) + .weightInit(WeightInit.XAVIER).activation(Activation.TANH).seed(123).list() + .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()) + .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + LossFunctions.LossFunction.MSE).activation(Activation.IDENTITY).nIn(10).nOut(10) + .build()) + .build(); + + NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder() + .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) + .l1Bias(0.1).l2Bias(0.2).weightInit(WeightInit.XAVIER).activation(Activation.TANH) + .seed(123).list().layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()) + .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + LossFunctions.LossFunction.MSE).activation(Activation.IDENTITY).nIn(10).nOut(10) + .build()) + .build(); + + MultiLayerNetwork net1 = new MultiLayerNetwork(conf1); + net1.init(); + + MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); + net2.init(); + + BaseLayer bl0 = (BaseLayer) net2.getLayer(0).getLayerConfiguration(); + assertEquals(0.1, TestUtils.getL1(bl0.getRegularizationBias()), 1e-6); + assertEquals(0.2, TestUtils.getL2(bl0.getRegularizationBias()), 1e-6); + + INDArray features = Nd4j.rand(10, 10); + INDArray labels = Nd4j.rand(10, 10); + + net2.setParams(net1.params().dup()); + + net1.setInput(features); + net1.setLabels(labels); + net2.setInput(features); + net2.setLabels(labels); + + net1.computeGradientAndScore(); + net2.computeGradientAndScore(); + + double r = net1.calcRegularizationScore(true); + assertEquals(0.0, r, 0.0); + + r = net2.calcRegularizationScore(true); + assertEquals(0.0, r, 0.0); + + double s1 = net1.score(); + double s2 = net2.score(); + assertEquals(s1, s2, 1e-6); //Biases initialized to 0 -> should initially have same score + + for (int i = 0; i < 10; i++) { + net1.fit(features, labels); } + net2.setParams(net1.params().dup()); + net1.computeGradientAndScore(); + net2.computeGradientAndScore(); - @Test - public void testScoreExamples() { - Nd4j.getRandom().setSeed(12345); - int nIn = 5; - int nOut = 6; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).l1(0.01) - .l2(0.01).updater(new Sgd(0.1)).activation(Activation.TANH).weightInit(WeightInit.XAVIER).list() - .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(20).build()) - .layer(1, new DenseLayer.Builder().nIn(20).nOut(30).build()).layer(2, new OutputLayer.Builder() - .lossFunction(LossFunctions.LossFunction.MSE).nIn(30).nOut(nOut).build()) - .build(); + r = net1.calcRegularizationScore(true); + assertEquals(0.0, r, 0.0); - MultiLayerConfiguration confNoReg = new NeuralNetConfiguration.Builder().seed(12345) - .updater(new Sgd(0.1)).activation(Activation.TANH).weightInit(WeightInit.XAVIER).list() - .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(20).build()) - .layer(1, new DenseLayer.Builder().nIn(20).nOut(30).build()).layer(2, new OutputLayer.Builder() - .lossFunction(LossFunctions.LossFunction.MSE).nIn(30).nOut(nOut).build()) - .build(); + r = net2.calcRegularizationScore(true); + assertTrue(r > 0.0); + s1 = net1.score(); + s2 = net2.score(); - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); + assertNotEquals(s1, s2, 1e-6); //Scores should differ due to bias l1/l2 - MultiLayerNetwork netNoReg = new MultiLayerNetwork(confNoReg); - netNoReg.init(); - netNoReg.setParameters(net.params().dup()); - - //Score single example, and compare to scoreExamples: - INDArray input = Nd4j.rand(3, nIn); - INDArray output = Nd4j.rand(3, nOut); - DataSet ds = new DataSet(input, output); - - INDArray scoresWithRegularization = net.scoreExamples(ds, true); - INDArray scoresNoRegularization = net.scoreExamples(ds, false); - - assertArrayEquals(new long[] {3, 1}, scoresWithRegularization.shape()); - assertArrayEquals(new long[] {3, 1}, scoresNoRegularization.shape()); - - for (int i = 0; i < 3; i++) { - DataSet singleEx = new DataSet(input.getRow(i,true), output.getRow(i,true)); - double score = net.score(singleEx); - double scoreNoReg = netNoReg.score(singleEx); - - double scoreUsingScoreExamples = scoresWithRegularization.getDouble(i); - double scoreUsingScoreExamplesNoReg = scoresNoRegularization.getDouble(i); - assertEquals(score, scoreUsingScoreExamples, 1e-4); - assertEquals(scoreNoReg, scoreUsingScoreExamplesNoReg, 1e-4); - assertTrue(scoreUsingScoreExamples > scoreUsingScoreExamplesNoReg); //Regularization term increases score - - // System.out.println(score + "\t" + scoreUsingScoreExamples + "\t|\t" + scoreNoReg + "\t" + scoreUsingScoreExamplesNoReg); - } + for (int i = 0; i < 2; i++) { + assertEquals(0.0, net1.getLayer(i).calcRegularizationScore(true), 0.0); + assertTrue(net2.getLayer(i).calcRegularizationScore(true) > 0.0); } - - @Test - public void testDataSetScore() { - - Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .weightInit(WeightInit.XAVIER).seed(12345L).list() - .layer(0, new DenseLayer.Builder().nIn(4).nOut(3).activation(Activation.SIGMOID).build()) - .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) - .activation(Activation.SOFTMAX).nIn(3).nOut(3).build()) - .build(); - - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - - INDArray in = Nd4j.create(new double[] {1.0, 2.0, 3.0, 4.0}, 1, 4); - INDArray out = Nd4j.create(new double[] {1, 0, 0}, 1,3); - - double score = net.score(new DataSet(in, out)); - } - - @Test - public void testDataSetScoreCNN() { - - int miniBatch = 3; - int depth = 2; - int width = 3; - int height = 3; - int nOut = 2; - - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .seed(12345L).list().layer(0, new ConvolutionLayer.Builder(2, 2).nOut(1).build()) - .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) - .activation(Activation.SOFTMAX).nOut(2).build()) - .setInputType(InputType.convolutionalFlat(height, width, depth)) - .build(); - - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - - Nd4j.getRandom().setSeed(12345); - Random r = new Random(12345); - INDArray input = Nd4j.rand(miniBatch, depth * width * height); - INDArray labels = Nd4j.create(miniBatch, nOut); - for (int i = 0; i < miniBatch; i++) { - labels.putScalar(new int[] {i, r.nextInt(nOut)}, 1.0); - } - - double score = net.score(new DataSet(input, labels)); - } - - @Test - public void testPredict() throws Exception { - - Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .weightInit(WeightInit.XAVIER).seed(12345L).list() - .layer(0, new DenseLayer.Builder().nIn(784).nOut(50).activation(Activation.RELU).build()) - .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) - .activation(Activation.SOFTMAX).nIn(50).nOut(10).build()) - .setInputType(InputType.convolutional(28, 28, 1)).build(); - - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - - DataSetIterator ds = new MnistDataSetIterator(10, 10); - net.fit(ds); - - DataSetIterator testDs = new MnistDataSetIterator(1, 1); - DataSet testData = testDs.next(); - testData.setLabelNames(Arrays.asList("0", "1", "2", "3", "4", "5", "6", "7", "8", "9")); - String actualLables = testData.getLabelName(0); - List prediction = net.predict(testData); - assertNotNull(actualLables); - assertNotNull(prediction.get(0)); - } - - @Test - //@Ignore - public void testCid() throws Exception { - System.out.println(EnvironmentUtils.buildCId()); - - Environment environment = EnvironmentUtils.buildEnvironment(); - environment.setSerialVersionID(EnvironmentUtils.buildCId()); - - Task task = TaskUtils.buildTask(Nd4j.create(new double[] {1, 2, 3, 4, 5, 6}, 1,6)); - - Heartbeat.getInstance().reportEvent(Event.STANDALONE, environment, task); - - Thread.sleep(25000); - } - - @Test - public void testOutput() throws Exception { - Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .weightInit(WeightInit.XAVIER).seed(12345L).list() - .layer(0, new DenseLayer.Builder().nIn(784).nOut(50).activation(Activation.RELU).build()) - .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) - .activation(Activation.SOFTMAX).nIn(50).nOut(10).build()) - .setInputType(InputType.convolutional(28, 28, 1)).build(); - - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - - DataSetIterator fullData = new MnistDataSetIterator(1, 2); - net.fit(fullData); - - - fullData.reset(); - DataSet expectedSet = fullData.next(2); - INDArray expectedOut = net.output(expectedSet.getFeatures(), false); - - fullData.reset(); - - INDArray actualOut = net.output(fullData); - - assertEquals(expectedOut, actualOut); - } - - @Test - public void testGradientUpdate() throws Exception { - DataSetIterator iter = new IrisDataSetIterator(1, 1); - - Gradient expectedGradient = new DefaultGradient(); - expectedGradient.setGradientFor("0_W", Nd4j.ones(4, 5)); - expectedGradient.setGradientFor("0_b", Nd4j.ones(1, 5)); - expectedGradient.setGradientFor("1_W", Nd4j.ones(5, 3)); - expectedGradient.setGradientFor("1_b", Nd4j.ones(1, 3)); - - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().updater(new Sgd(1.0)) - .activation(Activation.RELU).weightInit(WeightInit.XAVIER) - .list().layer(0, new DenseLayer.Builder().name("dnn1").nIn(4).nOut(5).build()) - .layer(1, new OutputLayer.Builder().name("output").nIn(5).nOut(3) - .activation(Activation.SOFTMAX).weightInit(WeightInit.XAVIER) - .build()) - .build(); - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - net.fit(iter.next()); - // TODO validate actual layer gradientView - issue getting var out of BaseLayer w/o adding MLN getter that gets confused with local gradient vars - Gradient actualGradient = net.gradient; - assertNotEquals(expectedGradient.getGradientFor("0_W"), actualGradient.getGradientFor("0_W")); - - net.update(expectedGradient); - actualGradient = net.gradient; - assertEquals(expectedGradient.getGradientFor("0_W"), actualGradient.getGradientFor("0_W")); - - // Update params with set - net.setParam("0_W", Nd4j.ones(4, 5)); - net.setParam("0_b", Nd4j.ones(1, 5)); - net.setParam("1_W", Nd4j.ones(5, 3)); - net.setParam("1_b", Nd4j.ones(1, 3)); - INDArray actualParams = net.params(); - - // Confirm params - assertEquals(expectedGradient.gradient(), actualParams); - - net.update(expectedGradient); - actualParams = net.params(); - assertEquals(Nd4j.ones(1, 43).addi(1), actualParams); - } - - - @Test - public void testCnnInvalidData() { - assertThrows(DL4JException.class, () -> { - int miniBatch = 3; - int depth = 2; - int width = 5; - int height = 5; - - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list() - .layer(0, new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1).padding(0, 0).nIn(2) - .nOut(2).build()) - .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) - .activation(Activation.SOFTMAX).nOut(2).build()) - .setInputType(InputType.convolutional(height, width, depth)) - .build(); - - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - - INDArray inputWrongDepth = Nd4j.rand(miniBatch, 5, height, width); //Order: examples, channels, height, width - net.feedForward(inputWrongDepth); - }); - } - - @Test - public void testApplyingPreTrainConfigAndParams() { - int nIn = 10; - int nOut = 10; - - // Test pretrain true - MultiLayerNetwork aePre = getAeModel(true, nIn, nOut); - int actualNP = (int)aePre.numParams(); - assertEquals(2 * (nIn * nOut + nOut) + nIn, actualNP); - INDArray params = aePre.params(); - assertEquals(params.length(), actualNP); // check num params - Map paramTable = aePre.paramTable(); - assertTrue(paramTable.containsKey("0_vb")); // check vb exists for pretrain layer - aePre.setParam("0_vb", Nd4j.ones(10)); - params = aePre.getParam("0_vb"); - assertEquals(Nd4j.ones(1,10), params); // check set params for vb - - - // Test pretrain false, expect same for true because its not changed when applying update - MultiLayerNetwork aeNoPre = getAeModel(false, nIn, nOut); - actualNP = (int)aeNoPre.numParams(); - assertEquals(2 * (nIn * nOut + nOut) + nIn, actualNP); - params = aeNoPre.params(); - assertEquals(params.length(), actualNP); - paramTable = aePre.paramTable(); - assertTrue(paramTable.containsKey("0_vb")); - } - - public MultiLayerNetwork getAeModel(boolean preTrain, int nIn, int nOut) { - MultiLayerConfiguration vae = new NeuralNetConfiguration.Builder() - .seed(42).updater(new NoOp()) - .weightInit(WeightInit.UNIFORM) - .list(new AutoEncoder.Builder() - .activation(Activation.IDENTITY).nOut(nIn).build(), - new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( - LossFunctions.LossFunction.COSINE_PROXIMITY) - .activation(Activation.IDENTITY).nOut(nOut) - .build()) - .setInputType(InputType.feedForward(nOut)).build(); - MultiLayerNetwork network = new MultiLayerNetwork(vae); - network.init(); - return network; - } - - - @Test - public void testIterationCountAndPersistence() throws IOException { - Nd4j.getRandom().setSeed(123); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(123) - .list() - .layer(0, new DenseLayer.Builder().nIn(4).nOut(3).weightInit(WeightInit.XAVIER) - .activation(Activation.TANH).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( - LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(3).nOut(3) - .build()) - .build(); - - - MultiLayerNetwork network = new MultiLayerNetwork(conf); - network.init(); - - DataSetIterator iter = new IrisDataSetIterator(50, 150); - - assertEquals(0, network.getLayerWiseConfigurations().getIterationCount()); - network.fit(iter); - assertEquals(3, network.getLayerWiseConfigurations().getIterationCount()); - iter.reset(); - network.fit(iter); - assertEquals(6, network.getLayerWiseConfigurations().getIterationCount()); - iter.reset(); - network.fit(iter.next()); - assertEquals(7, network.getLayerWiseConfigurations().getIterationCount()); - - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - ModelSerializer.writeModel(network, baos, true); - byte[] asBytes = baos.toByteArray(); - - ByteArrayInputStream bais = new ByteArrayInputStream(asBytes); - MultiLayerNetwork net = ModelSerializer.restoreMultiLayerNetwork(bais, true); - assertEquals(7, net.getLayerWiseConfigurations().getIterationCount()); - } - - - @Test - public void testBiasL1L2() { - - - Nd4j.getRandom().setSeed(123); - MultiLayerConfiguration conf1 = new NeuralNetConfiguration.Builder() - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .weightInit(WeightInit.XAVIER).activation(Activation.TANH).seed(123).list() - .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( - LossFunctions.LossFunction.MSE).activation(Activation.IDENTITY).nIn(10).nOut(10) - .build()) - .build(); - - MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .l1Bias(0.1).l2Bias(0.2).weightInit(WeightInit.XAVIER).activation(Activation.TANH) - .seed(123).list().layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( - LossFunctions.LossFunction.MSE).activation(Activation.IDENTITY).nIn(10).nOut(10) - .build()) - .build(); - - MultiLayerNetwork net1 = new MultiLayerNetwork(conf1); - net1.init(); - - MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); - net2.init(); - - BaseLayer bl0 = (BaseLayer) net2.getLayer(0).conf().getLayer(); - assertEquals(0.1, TestUtils.getL1(bl0.getRegularizationBias()), 1e-6); - assertEquals(0.2, TestUtils.getL2(bl0.getRegularizationBias()), 1e-6); - - INDArray features = Nd4j.rand(10, 10); - INDArray labels = Nd4j.rand(10, 10); - - net2.setParams(net1.params().dup()); - - net1.setInput(features); - net1.setLabels(labels); - net2.setInput(features); - net2.setLabels(labels); - - net1.computeGradientAndScore(); - net2.computeGradientAndScore(); - - double r = net1.calcRegularizationScore(true); - assertEquals(0.0, r, 0.0); - - r = net2.calcRegularizationScore(true); - assertEquals(0.0, r, 0.0); - - - double s1 = net1.score(); - double s2 = net2.score(); - assertEquals(s1, s2, 1e-6); //Biases initialized to 0 -> should initially have same score - - for (int i = 0; i < 10; i++) { - net1.fit(features, labels); - } - - net2.setParams(net1.params().dup()); - net1.computeGradientAndScore(); - net2.computeGradientAndScore(); - - r = net1.calcRegularizationScore(true); - assertEquals(0.0, r, 0.0); - - r = net2.calcRegularizationScore(true); - assertTrue(r > 0.0); - - s1 = net1.score(); - s2 = net2.score(); - - assertNotEquals(s1, s2, 1e-6); //Scores should differ due to bias l1/l2 - - for (int i = 0; i < 2; i++) { - assertEquals(0.0, net1.getLayer(i).calcRegularizationScore(true), 0.0); - assertTrue(net2.getLayer(i).calcRegularizationScore(true) > 0.0); - } - } - - /* - Summary should pick up preprocessors set manually on inputs as well - */ - @Test - public void testSummary() { - int V_WIDTH = 130; - int V_HEIGHT = 130; - int V_NFRAMES = 150; - MultiLayerConfiguration confForArchitecture = - new NeuralNetConfiguration.Builder().seed(12345).l2(0.001) //l2 regularization on all layers - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .list() - .layer(0, new ConvolutionLayer.Builder(10, 10).nIn(3) //3 channels: RGB - .nOut(30).stride(4, 4).activation(Activation.RELU).weightInit( - WeightInit.RELU) - .updater(Updater.ADAGRAD).build()) //Output: (130-10+0)/4+1 = 31 -> 31*31*30 - .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX) - .kernelSize(3, 3).stride(2, 2).build()) //(31-3+0)/2+1 = 15 - .layer(2, new ConvolutionLayer.Builder(3, 3).nIn(30).nOut(10).stride(2, 2) - .activation(Activation.RELU).weightInit(WeightInit.RELU) - .updater(Updater.ADAGRAD).build()) //Output: (15-3+0)/2+1 = 7 -> 7*7*10 = 490 - .layer(3, new DenseLayer.Builder().activation(Activation.RELU).nIn(490).nOut(50) - .weightInit(WeightInit.RELU).updater(Updater.ADAGRAD) - .gradientNormalization( - GradientNormalization.ClipElementWiseAbsoluteValue) - .gradientNormalizationThreshold(10).build()) - .layer(4, new GravesLSTM.Builder().activation(Activation.SOFTSIGN).nIn(50) - .nOut(50).weightInit(WeightInit.XAVIER).updater(Updater.ADAGRAD) - .gradientNormalization( - GradientNormalization.ClipElementWiseAbsoluteValue) - .gradientNormalizationThreshold(10) - .build()) - .layer(5, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) - .activation(Activation.SOFTMAX).nIn(50).nOut(4) //4 possible shapes: circle, square, arc, line - .updater(Updater.ADAGRAD).weightInit(WeightInit.XAVIER) - .gradientNormalization( - GradientNormalization.ClipElementWiseAbsoluteValue) - .gradientNormalizationThreshold(10).build()) - .inputPreProcessor(0, new RnnToCnnPreProcessor(V_HEIGHT, V_WIDTH, 3)) - .inputPreProcessor(3, new CnnToFeedForwardPreProcessor(7, 7, 10)) - .inputPreProcessor(4, new FeedForwardToRnnPreProcessor()) - .backpropType(BackpropType.TruncatedBPTT) - .tBPTTForwardLength(V_NFRAMES / 5).tBPTTBackwardLength(V_NFRAMES / 5).build(); - MultiLayerNetwork modelExpectedArch = new MultiLayerNetwork(confForArchitecture); - modelExpectedArch.init(); - MultiLayerNetwork modelMow = new TransferLearning.Builder(modelExpectedArch).setFeatureExtractor(2).build(); + } + + /* + Summary should pick up preprocessors set manually on inputs as well + */ + @Test + public void testSummary() { + int V_WIDTH = 130; + int V_HEIGHT = 130; + int V_NFRAMES = 150; + NeuralNetConfiguration confForArchitecture = + NeuralNetConfiguration.builder().seed(12345).l2(0.001) //l2 regularization on all layers + .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) + .list() + .layer(0, new ConvolutionLayer.Builder(10, 10).nIn(3) //3 channels: RGB + .nOut(30).stride(4, 4).activation(Activation.RELU).weightInit( + WeightInit.RELU) + .updater(Updater.ADAGRAD).build()) //Output: (130-10+0)/4+1 = 31 -> 31*31*30 + .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX) + .kernelSize(3, 3).stride(2, 2).build()) //(31-3+0)/2+1 = 15 + .layer(2, new ConvolutionLayer.Builder(3, 3).nIn(30).nOut(10).stride(2, 2) + .activation(Activation.RELU).weightInit(WeightInit.RELU) + .updater(Updater.ADAGRAD).build()) //Output: (15-3+0)/2+1 = 7 -> 7*7*10 = 490 + .layer(3, new DenseLayer.Builder().activation(Activation.RELU).nIn(490).nOut(50) + .weightInit(WeightInit.RELU).updater(Updater.ADAGRAD) + .gradientNormalization( + GradientNormalization.ClipElementWiseAbsoluteValue) + .gradientNormalizationThreshold(10).build()) + .layer(4, new GravesLSTM.Builder().activation(Activation.SOFTSIGN).nIn(50) + .nOut(50).weightInit(WeightInit.XAVIER).updater(Updater.ADAGRAD) + .gradientNormalization( + GradientNormalization.ClipElementWiseAbsoluteValue) + .gradientNormalizationThreshold(10) + .build()) + .layer(5, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX).nIn(50) + .nOut(4) //4 possible shapes: circle, square, arc, line + .updater(Updater.ADAGRAD).weightInit(WeightInit.XAVIER) + .gradientNormalization( + GradientNormalization.ClipElementWiseAbsoluteValue) + .gradientNormalizationThreshold(10).build()) + .inputPreProcessor(0, new RnnToCnnPreProcessor(V_HEIGHT, V_WIDTH, 3)) + .inputPreProcessor(3, new CnnToFeedForwardPreProcessor(7, 7, 10)) + .inputPreProcessor(4, new FeedForwardToRnnPreProcessor()) + .backpropType(BackpropType.TruncatedBPTT) + .tbpttFwdLength(V_NFRAMES / 5).tbpttBackLength(V_NFRAMES / 5).build(); + MultiLayerNetwork modelExpectedArch = new MultiLayerNetwork(confForArchitecture); + modelExpectedArch.init(); + MultiLayerNetwork modelMow = new TransferLearning.Builder( + modelExpectedArch).setFeatureExtractor(2).build(); // System.out.println(modelExpectedArch.summary()); // System.out.println(modelMow.summary()); // System.out.println(modelMow.summary(InputType.recurrent(V_HEIGHT*V_WIDTH*3))); + } + + @Test + public void testErrorNoOutputLayer() { + assertThrows(DL4JException.class, () -> { + NeuralNetConfiguration c = NeuralNetConfiguration.builder().list() + .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()).build(); + + MultiLayerNetwork net = new MultiLayerNetwork(c); + net.init(); + + INDArray f = Nd4j.create(1, 10); + INDArray l = Nd4j.create(1, 10); + + net.setInput(f); + net.setLabels(l); + + net.computeGradientAndScore(); + }); + } + + + @Test + public void testSetParamTable() { + + Nd4j.getRandom().setSeed(123); + NeuralNetConfiguration conf1 = NeuralNetConfiguration.builder().seed(123).list() + .layer(0, new DenseLayer.Builder().nIn(4).nOut(3).weightInit(WeightInit.XAVIER) + .activation(Activation.TANH).build()) + .layer(1, new DenseLayer.Builder().nIn(3).nOut(2).weightInit(WeightInit.XAVIER) + .activation(Activation.TANH).build()) + .layer(2, new LSTM.Builder().nIn(2).nOut(2).build()) + .layer(3, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).nIn(2).nOut(3) + .build()) + .build(); + + NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder().seed(987).list() + .layer(0, new DenseLayer.Builder().nIn(4).nOut(3).weightInit(WeightInit.XAVIER) + .activation(Activation.TANH).build()) + .layer(1, new DenseLayer.Builder().nIn(3).nOut(2).weightInit(WeightInit.XAVIER) + .activation(Activation.TANH).build()) + .layer(2, new LSTM.Builder().nIn(2).nOut(2).build()) + .layer(3, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).nIn(2).nOut(3) + .build()) + .build(); + + MultiLayerNetwork net1 = new MultiLayerNetwork(conf1); + net1.init(); + + MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); + net2.init(); + + assertNotEquals(net1.params(), net2.params()); + assertNotEquals(net1.getParamTable(), net2.getParamTable()); + + net1.setParamTable(net2.getParamTable()); + assertEquals(net1.params(), net2.params()); + assertEquals(net1.getParamTable(), net2.getParamTable()); + } + + + @Test + public void testCompareLayerMethods() { + //Simple test: compare .layer(int, ILayer) and .layer(ILayer) are identical + + NeuralNetConfiguration conf1 = NeuralNetConfiguration.builder().seed(123).list() + .layer(0, new DenseLayer.Builder().nIn(4).nOut(3).weightInit(WeightInit.XAVIER) + .activation(Activation.TANH).build()) + .layer(1, new DenseLayer.Builder().nIn(3).nOut(2).weightInit(WeightInit.XAVIER) + .activation(Activation.TANH).build()) + .layer(2, new LSTM.Builder().nIn(2).nOut(2).build()) + .layer(3, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).nIn(2).nOut(3) + .build()) + .build(); + + NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder().seed(123).list() + .layer(new DenseLayer.Builder().nIn(4).nOut(3).weightInit(WeightInit.XAVIER) + .activation(Activation.TANH).build()) + .layer(new DenseLayer.Builder().nIn(3).nOut(2).weightInit(WeightInit.XAVIER) + .activation(Activation.TANH).build()) + .layer(new LSTM.Builder().nIn(2).nOut(2).build()) + .layer(new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).nIn(2).nOut(3) + .build()) + .build(); + + assertEquals(conf1, conf2); + } + + + @Test + public void testEpochCounter() throws Exception { + + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() + .list() + .layer(new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX).build()) + .build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + assertEquals(0, net.getConfiguration().getEpochCount()); + + DataSetIterator iter = new IrisDataSetIterator(150, 150); + + for (int i = 0; i < 4; i++) { + assertEquals(i, net.getConfiguration().getEpochCount()); + net.fit(iter); + assertEquals(i + 1, net.getConfiguration().getEpochCount()); } - @Test - public void testErrorNoOutputLayer() { - assertThrows(DL4JException.class, () -> { - MultiLayerConfiguration c = new NeuralNetConfiguration.Builder().list() - .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()).build(); + assertEquals(4, net.getConfiguration().getEpochCount()); - MultiLayerNetwork net = new MultiLayerNetwork(c); - net.init(); + MultiLayerNetwork restored = TestUtils.testModelSerialization(net); + assertEquals(4, restored.getConfiguration().getEpochCount()); + } - INDArray f = Nd4j.create(1, 10); - INDArray l = Nd4j.create(1, 10); + @Test + public void testInputClearance() throws Exception { + //Activations should be cleared - if not, it's possible for out of (workspace) scope arrays to be around + // which can cause a crash + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() + .convolutionMode(ConvolutionMode.Same) + .list() + .layer(new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1).nIn(1).nOut(1).build()) + .layer(new SubsamplingLayer.Builder().kernelSize(2, 2).stride(1, 1).build()) + .layer(new DenseLayer.Builder().nOut(10).build()) + .layer(new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).build()) + .inputType(InputType.convolutional(28, 28, 1)) + .build(); - net.setInput(f); - net.setLabels(l); + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); - net.computeGradientAndScore(); - }); + INDArray content = Nd4j.create(1, 1, 28, 28); + + //Check output: + net.output(content); + for (org.deeplearning4j.nn.api.Layer l : net.getLayers()) { + assertNull(l.input()); } - - @Test - public void testSetParamTable() { - - Nd4j.getRandom().setSeed(123); - MultiLayerConfiguration conf1 = new NeuralNetConfiguration.Builder().seed(123).list() - .layer(0, new DenseLayer.Builder().nIn(4).nOut(3).weightInit(WeightInit.XAVIER) - .activation(Activation.TANH).build()) - .layer(1, new DenseLayer.Builder().nIn(3).nOut(2).weightInit(WeightInit.XAVIER) - .activation(Activation.TANH).build()) - .layer(2, new LSTM.Builder().nIn(2).nOut(2).build()) - .layer(3, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) - .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).nIn(2).nOut(3) - .build()) - .build(); - - MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder().seed(987).list() - .layer(0, new DenseLayer.Builder().nIn(4).nOut(3).weightInit(WeightInit.XAVIER) - .activation(Activation.TANH).build()) - .layer(1, new DenseLayer.Builder().nIn(3).nOut(2).weightInit(WeightInit.XAVIER) - .activation(Activation.TANH).build()) - .layer(2, new LSTM.Builder().nIn(2).nOut(2).build()) - .layer(3, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) - .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).nIn(2).nOut(3) - .build()) - .build(); - - MultiLayerNetwork net1 = new MultiLayerNetwork(conf1); - net1.init(); - - MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); - net2.init(); - - assertNotEquals(net1.params(), net2.params()); - assertNotEquals(net1.paramTable(), net2.paramTable()); - - net1.setParamTable(net2.paramTable()); - assertEquals(net1.params(), net2.params()); - assertEquals(net1.paramTable(), net2.paramTable()); + //Check feedForward: + net.feedForward(content, false); + for (org.deeplearning4j.nn.api.Layer l : net.getLayers()) { + assertNull(l.input()); } + } - @Test - public void testCompareLayerMethods(){ - //Simple test: compare .layer(int, ILayer) and .layer(ILayer) are identical + @Test + public void testExternalErrors() { + //Simple test: same network, but in one case: one less layer (the OutputLayer), where the epsilons are passed in externally + // instead. Should get identical results - MultiLayerConfiguration conf1 = new NeuralNetConfiguration.Builder().seed(123).list() - .layer(0, new DenseLayer.Builder().nIn(4).nOut(3).weightInit(WeightInit.XAVIER) - .activation(Activation.TANH).build()) - .layer(1, new DenseLayer.Builder().nIn(3).nOut(2).weightInit(WeightInit.XAVIER) - .activation(Activation.TANH).build()) - .layer(2, new LSTM.Builder().nIn(2).nOut(2).build()) - .layer(3, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) - .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).nIn(2).nOut(3) - .build()) - .build(); + for (WorkspaceMode ws : WorkspaceMode.values()) { + log.info("Workspace mode: " + ws); - MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder().seed(123).list() - .layer(new DenseLayer.Builder().nIn(4).nOut(3).weightInit(WeightInit.XAVIER) - .activation(Activation.TANH).build()) - .layer(new DenseLayer.Builder().nIn(3).nOut(2).weightInit(WeightInit.XAVIER) - .activation(Activation.TANH).build()) - .layer(new LSTM.Builder().nIn(2).nOut(2).build()) - .layer(new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) - .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).nIn(2).nOut(3) - .build()) - .build(); + Nd4j.getRandom().setSeed(12345); + INDArray inData = Nd4j.rand(3, 10); + INDArray outData = Nd4j.rand(3, 10); - assertEquals(conf1, conf2); + Nd4j.getRandom().setSeed(12345); + NeuralNetConfiguration standard = NeuralNetConfiguration.builder().updater(new Sgd(0.1)) + .trainingWorkspaceMode(ws) + .inferenceWorkspaceMode(ws) + .seed(12345).list() + .layer(new DenseLayer.Builder().nIn(10).nOut(10).build()) + .layer(new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(10) + .nOut(10).build()) + .build(); + MultiLayerNetwork s = new MultiLayerNetwork(standard); + s.init(); + + Nd4j.getRandom().setSeed(12345); + NeuralNetConfiguration external = NeuralNetConfiguration.builder().updater(new Sgd(0.1)) + .trainingWorkspaceMode(ws) + .inferenceWorkspaceMode(ws) + .seed(12345).list() + .layer(new DenseLayer.Builder().nIn(10).nOut(10).build()) + .build(); + + MultiLayerNetwork e = new MultiLayerNetwork(external); + e.init(); + + s.setInput(inData); + s.setLabels(outData); + s.computeGradientAndScore(); + Gradient sGrad = s.gradient(); + + s.setInput(inData); + s.feedForward(true, false); //FF without clearing inputs as we need them later + + e.setInput(inData); + e.feedForward(true, false); //FF without clearing inputs as we need them later + + org.deeplearning4j.nn.layers.OutputLayer ol = (org.deeplearning4j.nn.layers.OutputLayer) s.getLayer( + 1); + Pair olPairStd = ol.backpropGradient(null, + LayerWorkspaceMgr.noWorkspaces()); + + INDArray olEpsilon = olPairStd.getSecond().detach(); + + e.setInput(inData); + e.feedForward(true, false); + Pair extErrorGrad = e.backpropGradient(olEpsilon, + LayerWorkspaceMgr.noWorkspaces()); + + int nParamsDense = 10 * 10 + 10; + assertEquals(sGrad.gradient() + .get(NDArrayIndex.interval(0, 0, true), NDArrayIndex.interval(0, nParamsDense)), + extErrorGrad.getFirst().gradient()); + + Nd4j.getWorkspaceManager().destroyAllWorkspacesForCurrentThread(); } + } + @Test + public void testExternalErrors2() { + Nd4j.getExecutioner().setProfilingMode(OpExecutioner.ProfilingMode.SCOPE_PANIC); + int nIn = 4; + int nOut = 3; - @Test - public void testEpochCounter() throws Exception { - - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .list() - .layer(new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX).build()) - .build(); - - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - - assertEquals(0, net.getLayerWiseConfigurations().getEpochCount()); - - - DataSetIterator iter = new IrisDataSetIterator(150, 150); - - for( int i=0; i<4; i++ ){ - assertEquals(i, net.getLayerWiseConfigurations().getEpochCount()); - net.fit(iter); - assertEquals(i+1, net.getLayerWiseConfigurations().getEpochCount()); - } - - assertEquals(4, net.getLayerWiseConfigurations().getEpochCount()); - - MultiLayerNetwork restored = TestUtils.testModelSerialization(net); - assertEquals(4, restored.getLayerWiseConfigurations().getEpochCount()); - } - - @Test - public void testInputClearance() throws Exception { - //Activations should be cleared - if not, it's possible for out of (workspace) scope arrays to be around - // which can cause a crash - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .convolutionMode(ConvolutionMode.Same) - .list() - .layer(new ConvolutionLayer.Builder().kernelSize(2,2).stride(1,1).nIn(1).nOut(1).build()) - .layer(new SubsamplingLayer.Builder().kernelSize(2,2).stride(1,1).build()) - .layer(new DenseLayer.Builder().nOut(10).build()) - .layer(new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).build()) - .setInputType(InputType.convolutional(28,28,1)) - .build(); - - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - - INDArray content = Nd4j.create(1,1,28,28); - - //Check output: - net.output(content); - for(org.deeplearning4j.nn.api.Layer l : net.getLayers()){ - assertNull(l.input()); - } - - //Check feedForward: - net.feedForward(content, false); - for(org.deeplearning4j.nn.api.Layer l : net.getLayers()){ - assertNull(l.input()); - } - } - - - @Test - public void testExternalErrors() { - //Simple test: same network, but in one case: one less layer (the OutputLayer), where the epsilons are passed in externally - // instead. Should get identical results - - for(WorkspaceMode ws : WorkspaceMode.values()) { - log.info("Workspace mode: " + ws); - - Nd4j.getRandom().setSeed(12345); - INDArray inData = Nd4j.rand(3, 10); - INDArray outData = Nd4j.rand(3, 10); - - Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration standard = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1)) - .trainingWorkspaceMode(ws) - .inferenceWorkspaceMode(ws) - .seed(12345).list() - .layer(new DenseLayer.Builder().nIn(10).nOut(10).build()) - .layer(new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(10) - .nOut(10).build()) - .build(); - MultiLayerNetwork s = new MultiLayerNetwork(standard); - s.init(); - - - Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration external = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1)) - .trainingWorkspaceMode(ws) - .inferenceWorkspaceMode(ws) - .seed(12345).list() - .layer(new DenseLayer.Builder().nIn(10).nOut(10).build()) - .build(); - - MultiLayerNetwork e = new MultiLayerNetwork(external); - e.init(); - - s.setInput(inData); - s.setLabels(outData); - s.computeGradientAndScore(); - Gradient sGrad = s.gradient(); - - s.setInput(inData); - s.feedForward(true, false); //FF without clearing inputs as we need them later - - e.setInput(inData); - e.feedForward(true, false); //FF without clearing inputs as we need them later - - org.deeplearning4j.nn.layers.OutputLayer ol = (org.deeplearning4j.nn.layers.OutputLayer) s.getLayer(1); - Pair olPairStd = ol.backpropGradient(null, LayerWorkspaceMgr.noWorkspaces()); - - INDArray olEpsilon = olPairStd.getSecond().detach(); - - e.setInput(inData); - e.feedForward(true, false); - Pair extErrorGrad = e.backpropGradient(olEpsilon, LayerWorkspaceMgr.noWorkspaces()); - - int nParamsDense = 10 * 10 + 10; - assertEquals(sGrad.gradient().get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, nParamsDense)), - extErrorGrad.getFirst().gradient()); - - Nd4j.getWorkspaceManager().destroyAllWorkspacesForCurrentThread(); - } - } - - @Test - public void testExternalErrors2(){ - Nd4j.getExecutioner().setProfilingMode(OpExecutioner.ProfilingMode.SCOPE_PANIC); - int nIn = 4; - int nOut = 3; - - for(WorkspaceMode ws : WorkspaceMode.values()) { + for (WorkspaceMode ws : WorkspaceMode.values()) { // System.out.println("***** WORKSPACE: " + ws); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .updater(new Adam(0.01)) - .trainingWorkspaceMode(ws) - .inferenceWorkspaceMode(ws) - .list() - .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).activation(Activation.RELU).build()) - .layer(new ActivationLayer.Builder().activation(Activation.IDENTITY).build()) - .inputPreProcessor(0, new RnnToFeedForwardPreProcessor()) - .inputPreProcessor(1, new FeedForwardToRnnPreProcessor()) - .build(); + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() + .updater(new Adam(0.01)) + .trainingWorkspaceMode(ws) + .inferenceWorkspaceMode(ws) + .list() + .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).activation(Activation.RELU).build()) + .layer(new ActivationLayer.Builder().activation(Activation.IDENTITY).build()) + .inputPreProcessor(0, new RnnToFeedForwardPreProcessor()) + .inputPreProcessor(1, new FeedForwardToRnnPreProcessor()) + .build(); - MultiLayerNetwork graph = new MultiLayerNetwork(conf); - graph.init(); + MultiLayerNetwork graph = new MultiLayerNetwork(conf); + graph.init(); - final int minibatch = 5; - final int seqLen = 6; + final int minibatch = 5; + final int seqLen = 6; - INDArray param = Nd4j.create(new double[]{0.54, 0.31, 0.98, -0.30, -0.66, -0.19, -0.29, -0.62, 0.13, -0.32, 0.01, -0.03, 0.00, 0.00, 0.00}).reshape(1, -1); - graph.setParams(param); + INDArray param = Nd4j.create( + new double[]{0.54, 0.31, 0.98, -0.30, -0.66, -0.19, -0.29, -0.62, 0.13, -0.32, 0.01, + -0.03, 0.00, 0.00, 0.00}).reshape(1, -1); + graph.setParams(param); - INDArray input = Nd4j.rand(new int[]{minibatch, nIn, seqLen}, 12); - INDArray expected = Nd4j.ones(minibatch, nOut, seqLen); + INDArray input = Nd4j.rand(new int[]{minibatch, nIn, seqLen}, 12); + INDArray expected = Nd4j.ones(minibatch, nOut, seqLen); - graph.setInput(input); - INDArray output = graph.feedForward(false, false).get(2); - INDArray error = output.sub(expected); + graph.setInput(input); + INDArray output = graph.feedForward(false, false).get(2); + INDArray error = output.sub(expected); - for (org.deeplearning4j.nn.api.Layer l : graph.getLayers()) { - assertNotNull(l.input()); - assertFalse(l.input().isAttached()); - } + for (org.deeplearning4j.nn.api.Layer l : graph.getLayers()) { + assertNotNull(l.input()); + assertFalse(l.input().isAttached()); + } - // Compute Gradient - Pair gradient = graph.backpropGradient(error, LayerWorkspaceMgr.noWorkspaces()); - graph.getUpdater().update(graph, gradient.getFirst(), 0, 0, minibatch, LayerWorkspaceMgr.noWorkspaces()); + // Compute Gradient + Pair gradient = graph.backpropGradient(error, + LayerWorkspaceMgr.noWorkspaces()); + graph.getUpdater() + .update(graph, gradient.getFirst(), 0, 0, minibatch, LayerWorkspaceMgr.noWorkspaces()); - Nd4j.getWorkspaceManager().destroyAllWorkspacesForCurrentThread(); - } - - Nd4j.getExecutioner().setProfilingMode(OpExecutioner.ProfilingMode.DISABLED); + Nd4j.getWorkspaceManager().destroyAllWorkspacesForCurrentThread(); } - @Test - public void testLayerSize(){ - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + Nd4j.getExecutioner().setProfilingMode(OpExecutioner.ProfilingMode.DISABLED); + } - .list() - .layer(new ConvolutionLayer.Builder().kernelSize(2,2).nOut(6).build()) - .layer(new SubsamplingLayer.Builder().kernelSize(2,2).build()) - .layer(new DenseLayer.Builder().nOut(30).build()) - .layer(new OutputLayer.Builder().nOut(13).activation(Activation.SOFTMAX).build()) - .setInputType(InputType.convolutional(28,28,3)) - .build(); + @Test + public void testLayerSize() { + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); + .list() + .layer(new ConvolutionLayer.Builder().kernelSize(2, 2).nOut(6).build()) + .layer(new SubsamplingLayer.Builder().kernelSize(2, 2).build()) + .layer(new DenseLayer.Builder().nOut(30).build()) + .layer(new OutputLayer.Builder().nOut(13).activation(Activation.SOFTMAX).build()) + .inputType(InputType.convolutional(28, 28, 3)) + .build(); - assertEquals(6, net.layerSize(0)); - assertEquals(0, net.layerSize(1)); - assertEquals(30, net.layerSize(2)); - assertEquals(13, net.layerSize(3)); + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); - assertEquals(3, net.layerInputSize(0)); - assertEquals(0, net.layerInputSize(1)); - assertEquals(((FeedForwardLayer)net.getLayer(2).conf().getLayer()).getNIn(), net.layerInputSize(2)); - assertEquals(30, net.layerInputSize(3)); + assertEquals(6, net.layerSize(0)); + assertEquals(0, net.layerSize(1)); + assertEquals(30, net.layerSize(2)); + assertEquals(13, net.layerSize(3)); + + assertEquals(3, net.layerInputSize(0)); + assertEquals(0, net.layerInputSize(1)); + assertEquals(((FeedForwardLayer) net.getLayer(2).getLayerConfiguration()).getNIn(), + net.layerInputSize(2)); + assertEquals(30, net.layerInputSize(3)); + } + + + @Test + public void testZeroParamNet() throws Exception { + + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() + .list() + .layer(new SubsamplingLayer.Builder().kernelSize(2, 2).stride(2, 2).build()) + .layer(new LossLayer.Builder().activation(Activation.SIGMOID) + .lossFunction(LossFunctions.LossFunction.MSE).build()) + .inputType(InputType.convolutionalFlat(28, 28, 1)) + .build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + DataSet ds = new MnistDataSetIterator(16, true, 12345).next(); + + INDArray out = net.output(ds.getFeatures()); + + INDArray labelTemp = Nd4j.create(out.shape()); + ds.setLabels(labelTemp); + + net.fit(ds); + + MultiLayerNetwork net2 = TestUtils.testModelSerialization(net); + INDArray out2 = net2.output(ds.getFeatures()); + assertEquals(out, out2); + } + + + @Test + public void testInputActivationGradient() { + Nd4j.setDataType(DataType.DOUBLE); + + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() + .dataType(DataType.DOUBLE) + .seed(12345) + .activation(Activation.TANH) + .list() + .layer(new DenseLayer.Builder().nIn(10).nOut(10).build()) + .layer( + new OutputLayer.Builder().nIn(10).nOut(10).lossFunction(LossFunctions.LossFunction.MSE) + .build()) + .build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + INDArray in = Nd4j.rand(1, 10); + INDArray label = Nd4j.rand(1, 10); + + Pair p = net.calculateGradients(in, label, null, null); + + //Quick gradient check: + double eps = 1e-6; + double maxRelError = 1e-5; + for (int i = 0; i < 10; i++) { + double orig = in.getDouble(i); + in.putScalar(i, orig + eps); + double scorePlus = net.score(new DataSet(in, label)); + in.putScalar(i, orig - eps); + double scoreMinus = net.score(new DataSet(in, label)); + in.putScalar(i, orig); + + double expGrad = (scorePlus - scoreMinus) / (2.0 * eps); + double actGrad = p.getSecond().getDouble(i); + + double relError = (Math.abs(expGrad - actGrad)) / (Math.abs(expGrad) + Math.abs(actGrad)); + + String str = i + " - " + relError + " - exp=" + expGrad + ", act=" + actGrad; + assertTrue(relError < maxRelError, str); } + } - @Test - public void testZeroParamNet() throws Exception { + @Test + public void testNeuralNetConfigurationActivationTypes() { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .list() - .layer(new SubsamplingLayer.Builder().kernelSize(2,2).stride(2,2).build()) - .layer(new LossLayer.Builder().activation(Activation.SIGMOID).lossFunction(LossFunctions.LossFunction.MSE).build()) - .setInputType(InputType.convolutionalFlat(28,28,1)) - .build(); + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder() + .list() + .layer(new LSTM.Builder().nOut(6).build()) + .layer(new LSTM.Builder().nOut(7).build()) + .layer(new GlobalPoolingLayer()) + .layer(new OutputLayer.Builder().nOut(8).activation(Activation.SOFTMAX).build()) + .inputType(InputType.recurrent(10)); - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); + NeuralNetConfiguration conf = builder.build(); - DataSet ds = new MnistDataSetIterator(16, true, 12345).next(); + List outBuilder = builder.getLayerActivationTypes(); + List outConf = conf.getLayerActivationTypes(InputType.recurrent(10)); - INDArray out = net.output(ds.getFeatures()); + List exp = Arrays.asList( + InputType.recurrent(6), + InputType.recurrent(7), + InputType.feedForward(7), + InputType.feedForward(8) + ); - INDArray labelTemp = Nd4j.create(out.shape()); - ds.setLabels(labelTemp); + assertEquals(exp, outBuilder); + assertEquals(exp, outConf); + } - net.fit(ds); + @Test + public void testMultipleEpochsSimple() { + //Mainly a simple sanity check on the preconditions in the method... + DataSetIterator iter = new IrisDataSetIterator(10, 150); - MultiLayerNetwork net2 = TestUtils.testModelSerialization(net); - INDArray out2 = net2.output(ds.getFeatures()); - assertEquals(out, out2); - } + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() + .list() + .layer(new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX).build()) + .build(); + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + net.fit(iter, 3); - @Test - public void testInputActivationGradient(){ - Nd4j.setDataType(DataType.DOUBLE); + ComputationGraph g = net.toComputationGraph(); + g.fit(iter, 3); + } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .dataType(DataType.DOUBLE) - .seed(12345) - .activation(Activation.TANH) - .list() - .layer(new DenseLayer.Builder().nIn(10).nOut(10).build()) - .layer(new OutputLayer.Builder().nIn(10).nOut(10).lossFunction(LossFunctions.LossFunction.MSE).build()) - .build(); - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); + @Test + public void testPretrainFitMethods() { - INDArray in = Nd4j.rand(1, 10); - INDArray label = Nd4j.rand(1, 10); + //The fit methods should *not* do layerwise pretraining: - Pair p = net.calculateGradients(in, label, null, null); + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() - //Quick gradient check: - double eps = 1e-6; - double maxRelError = 1e-5; - for( int i=0; i<10; i++ ){ - double orig = in.getDouble(i); - in.putScalar(i, orig + eps); - double scorePlus = net.score(new DataSet(in, label)); - in.putScalar(i, orig - eps); - double scoreMinus = net.score(new DataSet(in, label)); - in.putScalar(i, orig); + .list() + .layer(new VariationalAutoencoder.Builder() + .nIn(10).nOut(10).encoderLayerSizes(10).decoderLayerSizes(10).build()) + .layer(new OutputLayer.Builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build()) - double expGrad = (scorePlus - scoreMinus) / (2.0 * eps); - double actGrad = p.getSecond().getDouble(i); + .build(); - double relError = (Math.abs(expGrad - actGrad)) / (Math.abs(expGrad) + Math.abs(actGrad)); - - String str = i + " - " + relError + " - exp=" + expGrad + ", act=" + actGrad; - assertTrue(relError < maxRelError, str); - } - } - - - @Test - public void testMultiLayerConfigurationActivationTypes(){ - - NeuralNetConfiguration.ListBuilder builder = new NeuralNetConfiguration.Builder() - .list() - .layer(new LSTM.Builder().nOut(6).build()) - .layer(new LSTM.Builder().nOut(7).build()) - .layer(new GlobalPoolingLayer()) - .layer(new OutputLayer.Builder().nOut(8).activation(Activation.SOFTMAX).build()) - .setInputType(InputType.recurrent(10)); - - MultiLayerConfiguration conf = builder.build(); - - List outBuilder = builder.getLayerActivationTypes(); - List outConf = conf.getLayerActivationTypes(InputType.recurrent(10)); - - List exp = Arrays.asList( - InputType.recurrent(6), - InputType.recurrent(7), - InputType.feedForward(7), - InputType.feedForward(8) - ); - - - assertEquals(exp, outBuilder); - assertEquals(exp, outConf); - } - - @Test - public void testMultipleEpochsSimple(){ - //Mainly a simple sanity check on the preconditions in the method... - DataSetIterator iter = new IrisDataSetIterator(10, 150); - - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .list() - .layer(new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX).build()) - .build(); - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - - net.fit(iter, 3); - - ComputationGraph g = net.toComputationGraph(); - g.fit(iter, 3); - } - - - @Test - public void testPretrainFitMethods(){ - - //The fit methods should *not* do layerwise pretraining: - - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - - .list() - .layer(new VariationalAutoencoder.Builder() - .nIn(10).nOut(10).encoderLayerSizes(10).decoderLayerSizes(10).build()) - .layer(new OutputLayer.Builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build()) - - .build(); - - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - - Set> exp = new HashSet<>(); - exp.add(MultiLayerNetwork.class); - - CheckModelsListener listener = new CheckModelsListener(); - net.setListeners(listener); - - INDArray f = Nd4j.create(1,10); - INDArray l = Nd4j.create(1,10); - DataSet ds = new DataSet(f,l); - MultiDataSet mds = new org.nd4j.linalg.dataset.MultiDataSet(f,l); - - DataSetIterator iter = new ExistingDataSetIterator(Collections.singletonList(ds)); - net.fit(iter); - assertEquals(exp, listener.getModelClasses()); - - net.fit(ds); - assertEquals(exp, listener.getModelClasses()); - - net.fit(f, l); - assertEquals(exp, listener.getModelClasses()); - - net.fit(f, l, null, null); - assertEquals(exp, listener.getModelClasses()); - - net.fit(mds); - assertEquals(exp, listener.getModelClasses()); - - net.fit(new SingletonMultiDataSetIterator(mds)); - assertEquals(exp, listener.getModelClasses()); - } - - @Test - public void testINDArrayConfigCloning(){ - //INDArrays in config should be cloned to avoid threading issues - - int mb = 3; - int b = 4; - int c = 3; - int depth = b * (5 + c); - int w = 6; - int h = 6; - - INDArray bbPrior = Nd4j.rand(b, 2).muliRowVector(Nd4j.create(new double[]{w, h}).castTo(Nd4j.defaultFloatingPointType())); - - - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .l2(0.01) - .list() - .layer(new ConvolutionLayer.Builder().nIn(depth).nOut(depth).kernelSize(1,1).build()) - .layer(new Yolo2OutputLayer.Builder() - .boundingBoxPriors(bbPrior) - .build()) - .build(); - - MultiLayerConfiguration conf2 = conf.clone(); - - INDArray bb1 = ((Yolo2OutputLayer)conf.getConf(1).getLayer()).getBoundingBoxes(); - INDArray bb2 = ((Yolo2OutputLayer)conf2.getConf(1).getLayer()).getBoundingBoxes(); - assertNotSame(bb1, bb2); - - assertEquals(bb1, bb2); - } - - @Data - @EqualsAndHashCode(callSuper = false) - public static class CheckModelsListener extends BaseTrainingListener { - - private Set> modelClasses = new HashSet<>(); - - @Override - public void iterationDone(Model model, int iteration, int epoch) { - modelClasses.add(model.getClass()); - } - } - - - @Test - public void testMLNUpdaterBlocks(){ - //Check that setting learning rate results in correct rearrangement of updater state within updater blocks - //https://github.com/deeplearning4j/deeplearning4j/issues/6809#issuecomment-463892644 - - double lr = 1e-3; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .seed(12345) - .weightInit(WeightInit.XAVIER) - .updater(new Adam(lr)) - .list() - .layer(new DenseLayer.Builder().nIn(5).nOut(3).build()) - .layer(new DenseLayer.Builder().nIn(3).nOut(2).build()) - .layer(new OutputLayer.Builder(LossFunctions.LossFunction.XENT).nIn(2).nOut(1) - .activation(Activation.SIGMOID).build()) - .build(); - - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - - INDArray in = Nd4j.rand(1, 5); - INDArray lbl = Nd4j.rand(1,1); - - net.fit(new DataSet(in, lbl)); - - INDArray viewArray = net.getUpdater().getStateViewArray(); - INDArray viewArrayCopy = viewArray.dup(); - //Initially updater view array is set out like: - //[m0w, m0b, m1w, m1b, m2w, m2b][v0w, v0b, v1w, v1b, v2w, v2b] - long soFar = 0; - INDArray m0w = viewArray.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(soFar, soFar+5*3)).assign(0); //m0w - soFar += 5*3; - INDArray m0b = viewArray.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(soFar, soFar+3)).assign(1); //m0b - soFar += 3; - INDArray m1w = viewArray.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(soFar, soFar+3*2)).assign(2); //m1w - soFar += 3*2; - INDArray m1b = viewArray.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(soFar, soFar+2)).assign(3); //m1b - soFar += 2; - INDArray m2w = viewArray.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(soFar, soFar+ 2)).assign(4); //m2w - soFar += 2; - INDArray m2b = viewArray.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(soFar, soFar+1)).assign(5); //m2b - soFar += 1; - - INDArray v0w = viewArray.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(soFar, soFar+5*3)).assign(6); //v0w - soFar += 5*3; - INDArray v0b = viewArray.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(soFar, soFar+3)).assign(7); //v0b - soFar += 3; - INDArray v1w = viewArray.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(soFar, soFar+3*2)).assign(8); //v1w - soFar += 3*2; - INDArray v1b = viewArray.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(soFar, soFar+2)).assign(9); //v1b - soFar += 2; - INDArray v2w = viewArray.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(soFar, soFar+ 2)).assign(10); //v2w - soFar += 2; - INDArray v2b = viewArray.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(soFar, soFar+1)).assign(11); //v2b - soFar += 1; - - - net.setLearningRate(0, 0.0); - - //Expect new updater state to look like: - //[m0w, m0b][v0w,v0b], [m1w, m1b, m2w, m2b][v1w, v1b, v2w, v2b] - INDArray exp = Nd4j.concat(1, m0w, m0b, v0w, v0b, - m1w, m1b, m2w, m2b, v1w, v1b, v2w, v2b); - - INDArray act = net.getUpdater().getStateViewArray(); + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + Set> exp = new HashSet<>(); + exp.add(MultiLayerNetwork.class); + + CheckModelsListener listener = new CheckModelsListener(); + net.setListeners(listener); + + INDArray f = Nd4j.create(1, 10); + INDArray l = Nd4j.create(1, 10); + DataSet ds = new DataSet(f, l); + MultiDataSet mds = new org.nd4j.linalg.dataset.MultiDataSet(f, l); + + DataSetIterator iter = new ExistingDataSetIterator(Collections.singletonList(ds)); + net.fit(iter); + assertEquals(exp, listener.getModelClasses()); + + net.fit(ds); + assertEquals(exp, listener.getModelClasses()); + + net.fit(f, l); + assertEquals(exp, listener.getModelClasses()); + + net.fit(f, l, null, null); + assertEquals(exp, listener.getModelClasses()); + + net.fit(mds); + assertEquals(exp, listener.getModelClasses()); + + net.fit(new SingletonMultiDataSetIterator(mds)); + assertEquals(exp, listener.getModelClasses()); + } + + @Test + public void testINDArrayConfigCloning() { + //INDArrays in config should be cloned to avoid threading issues + + int mb = 3; + int b = 4; + int c = 3; + int depth = b * (5 + c); + int w = 6; + int h = 6; + + INDArray bbPrior = Nd4j.rand(b, 2) + .muliRowVector(Nd4j.create(new double[]{w, h}).castTo(Nd4j.defaultFloatingPointType())); + + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() + .l2(0.01) + .list() + .layer(new ConvolutionLayer.Builder().nIn(depth).nOut(depth).kernelSize(1, 1).build()) + .layer(new Yolo2OutputLayer.Builder() + .boundingBoxPriors(bbPrior) + .build()) + .build(); + + NeuralNetConfiguration conf2 = conf.clone(); + + INDArray bb1 = ((Yolo2OutputLayer) conf.getConf(1).getLayer()).getBoundingBoxes(); + INDArray bb2 = ((Yolo2OutputLayer) conf2.getConf(1).getLayer()).getBoundingBoxes(); + assertNotSame(bb1, bb2); + + assertEquals(bb1, bb2); + } + + @Test + public void testMLNUpdaterBlocks() { + //Check that setting learning rate results in correct rearrangement of updater state within updater blocks + //https://github.com/deeplearning4j/deeplearning4j/issues/6809#issuecomment-463892644 + + double lr = 1e-3; + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() + .seed(12345) + .weightInit(WeightInit.XAVIER) + .updater(new Adam(lr)) + .list() + .layer(new DenseLayer.Builder().nIn(5).nOut(3).build()) + .layer(new DenseLayer.Builder().nIn(3).nOut(2).build()) + .layer(new OutputLayer.Builder(LossFunctions.LossFunction.XENT).nIn(2).nOut(1) + .activation(Activation.SIGMOID).build()) + .build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + INDArray in = Nd4j.rand(1, 5); + INDArray lbl = Nd4j.rand(1, 1); + + net.fit(new DataSet(in, lbl)); + + INDArray viewArray = net.getUpdater().getStateViewArray(); + INDArray viewArrayCopy = viewArray.dup(); + //Initially updater view array is set out like: + //[m0w, m0b, m1w, m1b, m2w, m2b][v0w, v0b, v1w, v1b, v2w, v2b] + long soFar = 0; + INDArray m0w = viewArray.get(NDArrayIndex.interval(0, 0, true), + NDArrayIndex.interval(soFar, soFar + 5 * 3)).assign(0); //m0w + soFar += 5 * 3; + INDArray m0b = viewArray.get(NDArrayIndex.interval(0, 0, true), + NDArrayIndex.interval(soFar, soFar + 3)).assign(1); //m0b + soFar += 3; + INDArray m1w = viewArray.get(NDArrayIndex.interval(0, 0, true), + NDArrayIndex.interval(soFar, soFar + 3 * 2)).assign(2); //m1w + soFar += 3 * 2; + INDArray m1b = viewArray.get(NDArrayIndex.interval(0, 0, true), + NDArrayIndex.interval(soFar, soFar + 2)).assign(3); //m1b + soFar += 2; + INDArray m2w = viewArray.get(NDArrayIndex.interval(0, 0, true), + NDArrayIndex.interval(soFar, soFar + 2)).assign(4); //m2w + soFar += 2; + INDArray m2b = viewArray.get(NDArrayIndex.interval(0, 0, true), + NDArrayIndex.interval(soFar, soFar + 1)).assign(5); //m2b + soFar += 1; + + INDArray v0w = viewArray.get(NDArrayIndex.interval(0, 0, true), + NDArrayIndex.interval(soFar, soFar + 5 * 3)).assign(6); //v0w + soFar += 5 * 3; + INDArray v0b = viewArray.get(NDArrayIndex.interval(0, 0, true), + NDArrayIndex.interval(soFar, soFar + 3)).assign(7); //v0b + soFar += 3; + INDArray v1w = viewArray.get(NDArrayIndex.interval(0, 0, true), + NDArrayIndex.interval(soFar, soFar + 3 * 2)).assign(8); //v1w + soFar += 3 * 2; + INDArray v1b = viewArray.get(NDArrayIndex.interval(0, 0, true), + NDArrayIndex.interval(soFar, soFar + 2)).assign(9); //v1b + soFar += 2; + INDArray v2w = viewArray.get(NDArrayIndex.interval(0, 0, true), + NDArrayIndex.interval(soFar, soFar + 2)).assign(10); //v2w + soFar += 2; + INDArray v2b = viewArray.get(NDArrayIndex.interval(0, 0, true), + NDArrayIndex.interval(soFar, soFar + 1)).assign(11); //v2b + soFar += 1; + + net.setLearningRate(0, 0.0); + + //Expect new updater state to look like: + //[m0w, m0b][v0w,v0b], [m1w, m1b, m2w, m2b][v1w, v1b, v2w, v2b] + INDArray exp = Nd4j.concat(1, m0w, m0b, v0w, v0b, + m1w, m1b, m2w, m2b, v1w, v1b, v2w, v2b); + + INDArray act = net.getUpdater().getStateViewArray(); // System.out.println(exp); // System.out.println(act); - assertEquals(exp, act); + assertEquals(exp, act); - //And set layer 1 LR: - net.setLearningRate(1, 0.2); - exp = Nd4j.concat(1, m0w, m0b, v0w, v0b, - m1w, m1b, v1w, v1b, - m2w, m2b, v2w, v2b); - assertEquals(exp, net.getUpdater().getStateViewArray()); + //And set layer 1 LR: + net.setLearningRate(1, 0.2); + exp = Nd4j.concat(1, m0w, m0b, v0w, v0b, + m1w, m1b, v1w, v1b, + m2w, m2b, v2w, v2b); + assertEquals(exp, net.getUpdater().getStateViewArray()); + //Set all back to original LR and check again: + net.setLearningRate(1, lr); + net.setLearningRate(0, lr); - //Set all back to original LR and check again: - net.setLearningRate(1, lr); - net.setLearningRate(0, lr); + exp = Nd4j.concat(1, m0w, m0b, m1w, m1b, m2w, m2b, v0w, v0b, v1w, v1b, v2w, v2b); + assertEquals(exp, net.getUpdater().getStateViewArray()); - exp = Nd4j.concat(1, m0w, m0b, m1w, m1b, m2w, m2b, v0w, v0b, v1w, v1b, v2w, v2b); - assertEquals(exp, net.getUpdater().getStateViewArray()); + //Finally, training sanity check (if things are wrong, we get -ve values in adam V, which causes NaNs) + net.getUpdater().getStateViewArray().assign(viewArrayCopy); + net.setLearningRate(0, 0.0); + Nd4j.getExecutioner().setProfilingMode(OpExecutioner.ProfilingMode.NAN_PANIC); + net.fit(new DataSet(in, lbl)); + Nd4j.getExecutioner().setProfilingMode(OpExecutioner.ProfilingMode.SCOPE_PANIC); + } - //Finally, training sanity check (if things are wrong, we get -ve values in adam V, which causes NaNs) - net.getUpdater().getStateViewArray().assign(viewArrayCopy); - net.setLearningRate(0, 0.0); + @Data + @EqualsAndHashCode(callSuper = false) + public static class CheckModelsListener extends BaseTrainingListener { - Nd4j.getExecutioner().setProfilingMode(OpExecutioner.ProfilingMode.NAN_PANIC); - net.fit(new DataSet(in, lbl)); - Nd4j.getExecutioner().setProfilingMode(OpExecutioner.ProfilingMode.SCOPE_PANIC); + private Set> modelClasses = new HashSet<>(); + + @Override + public void iterationDone(IModel model, int iteration, int epoch) { + modelClasses.add(model.getClass()); } + } } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTestRNN.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTestRNN.java index a12bd88f9..1a6175cde 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTestRNN.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTestRNN.java @@ -29,6 +29,7 @@ import org.deeplearning4j.nn.conf.*; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.GlobalPoolingLayer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.conf.layers.RnnOutputLayer; import org.deeplearning4j.nn.conf.layers.misc.FrozenLayer; @@ -67,8 +68,8 @@ public class MultiLayerTestRNN extends BaseDL4JTest { int nIn = 8; int nOut = 25; int nHiddenUnits = 17; - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() .list().layer(0, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder() .nIn(nIn).nOut(nHiddenUnits) @@ -112,7 +113,7 @@ public class MultiLayerTestRNN extends BaseDL4JTest { int nIn = 8; int nOut = 25; int[] nHiddenUnits = {17, 19, 23}; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() .layer(0, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(17) .activation(Activation.TANH).build()) .layer(1, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(17).nOut(19) @@ -160,8 +161,8 @@ public class MultiLayerTestRNN extends BaseDL4JTest { Nd4j.getRandom().setSeed(12345); int timeSeriesLength = 6; - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() .list().layer(0, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder() .nIn(5).nOut(7).activation(Activation.TANH) @@ -225,8 +226,8 @@ public class MultiLayerTestRNN extends BaseDL4JTest { public void testRnnTimeStepLayers() { for( int layerType=0; layerType<3; layerType++ ) { - org.deeplearning4j.nn.conf.layers.Layer l0; - org.deeplearning4j.nn.conf.layers.Layer l1; + LayerConfiguration l0; + LayerConfiguration l1; String lastActKey; if(layerType == 0){ @@ -262,7 +263,7 @@ public class MultiLayerTestRNN extends BaseDL4JTest { int timeSeriesLength = 12; //4 layer network: 2 GravesLSTM + DenseLayerConfiguration + RnnOutputLayer. Hence also tests preprocessors. - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345).list() .layer(0, l0) .layer(1, l1) .layer(2, new DenseLayer.Builder().nIn(8).nOut(9).activation(Activation.TANH) @@ -349,8 +350,8 @@ public class MultiLayerTestRNN extends BaseDL4JTest { Nd4j.getRandom().setSeed(12345); int timeSeriesLength = 6; - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() .list().layer(0, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder() .nIn(5).nOut(7).activation(Activation.TANH) @@ -408,7 +409,7 @@ public class MultiLayerTestRNN extends BaseDL4JTest { int nIn = 5; int nOut = 4; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .trainingWorkspaceMode(WorkspaceMode.NONE).inferenceWorkspaceMode(WorkspaceMode.NONE) .list() .layer(0, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(7) @@ -427,7 +428,7 @@ public class MultiLayerTestRNN extends BaseDL4JTest { .build(); assertEquals(BackpropType.Standard, conf.getBackpropType()); - MultiLayerConfiguration confTBPTT = new NeuralNetConfiguration.Builder().seed(12345) + NeuralNetConfiguration confTBPTT = NeuralNetConfiguration.builder().seed(12345) .trainingWorkspaceMode(WorkspaceMode.NONE).inferenceWorkspaceMode(WorkspaceMode.NONE) .list() .layer(0, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(7) @@ -443,8 +444,9 @@ public class MultiLayerTestRNN extends BaseDL4JTest { .nIn(8).nOut(nOut).activation(Activation.SOFTMAX) .dist(new NormalDistribution(0, 0.5)) .build()) - .backpropType(BackpropType.TruncatedBPTT).tBPTTBackwardLength(timeSeriesLength) - .tBPTTForwardLength(timeSeriesLength).build(); + .backpropType(BackpropType.TruncatedBPTT) + .tbpttBackLength(timeSeriesLength) + .tbpttBackLength(timeSeriesLength).build(); Nd4j.getRandom().setSeed(12345); MultiLayerNetwork mln = new MultiLayerNetwork(conf); @@ -456,9 +458,9 @@ public class MultiLayerTestRNN extends BaseDL4JTest { mlnTBPTT.clearTbpttState = false; - assertEquals(BackpropType.TruncatedBPTT, mlnTBPTT.getLayerWiseConfigurations().getBackpropType()); - assertEquals(timeSeriesLength, mlnTBPTT.getLayerWiseConfigurations().getTbpttFwdLength()); - assertEquals(timeSeriesLength, mlnTBPTT.getLayerWiseConfigurations().getTbpttBackLength()); + assertEquals(BackpropType.TruncatedBPTT, mlnTBPTT.getConfiguration().getBackpropType()); + assertEquals(timeSeriesLength, mlnTBPTT.getConfiguration().getTbpttFwdLength()); + assertEquals(timeSeriesLength, mlnTBPTT.getConfiguration().getTbpttBackLength()); INDArray inputData = Nd4j.rand(miniBatchSize, nIn, timeSeriesLength); INDArray labels = Nd4j.rand(miniBatchSize, nOut, timeSeriesLength); @@ -520,8 +522,8 @@ public class MultiLayerTestRNN extends BaseDL4JTest { int nTimeSlices = 5; - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().seed(12345).list().layer(0, + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder().seed(12345).list().layer(0, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(7) .activation(Activation.TANH) .dist(new NormalDistribution(0, 0.5)).build()) @@ -602,7 +604,7 @@ public class MultiLayerTestRNN extends BaseDL4JTest { int nTimeSlices = 20; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() .layer(0, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(7) .activation(Activation.TANH) @@ -618,7 +620,7 @@ public class MultiLayerTestRNN extends BaseDL4JTest { .dist(new NormalDistribution(0, 0.5)) .build()) .backpropType(BackpropType.TruncatedBPTT) - .tBPTTBackwardLength(timeSeriesLength).tBPTTForwardLength(timeSeriesLength).build(); + .tbpttBackLength(timeSeriesLength).tbpttFwdLength(timeSeriesLength).build(); Nd4j.getRandom().setSeed(12345); MultiLayerNetwork mln = new MultiLayerNetwork(conf); @@ -639,7 +641,7 @@ public class MultiLayerTestRNN extends BaseDL4JTest { int nIn = 5; int nOut = 4; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() .layer(0, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(7) .activation(Activation.TANH) @@ -655,7 +657,7 @@ public class MultiLayerTestRNN extends BaseDL4JTest { .dist(new NormalDistribution(0, 0.5)) .build()) .backpropType(BackpropType.TruncatedBPTT) - .tBPTTBackwardLength(tbpttLength).tBPTTForwardLength(tbpttLength).build(); + .tbpttBackLength(tbpttLength).tbpttFwdLength(tbpttLength).build(); Nd4j.getRandom().setSeed(12345); MultiLayerNetwork mln = new MultiLayerNetwork(conf); @@ -675,8 +677,8 @@ public class MultiLayerTestRNN extends BaseDL4JTest { @Test public void testRnnTimeStepWithPreprocessor() { - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .list() .layer(0, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(10) @@ -698,7 +700,7 @@ public class MultiLayerTestRNN extends BaseDL4JTest { @Test public void testRnnTimeStepWithPreprocessorGraph() { - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .graphBuilder().addInputs("in") .addLayer("0", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(10).nOut(10) @@ -727,7 +729,7 @@ public class MultiLayerTestRNN extends BaseDL4JTest { int nIn = 5; int nOut = 4; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(WeightInit.XAVIER).list() .layer(0, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(7) @@ -737,7 +739,7 @@ public class MultiLayerTestRNN extends BaseDL4JTest { .layer(2, new RnnOutputLayer.Builder(LossFunction.MSE).nIn(8).nOut(nOut) .activation(Activation.IDENTITY).build()) .backpropType(BackpropType.TruncatedBPTT) - .tBPTTBackwardLength(tbpttLength).tBPTTForwardLength(tbpttLength).build(); + .tbpttBackLength(tbpttLength).tbpttFwdLength(tbpttLength).build(); Nd4j.getRandom().setSeed(12345); MultiLayerNetwork mln = new MultiLayerNetwork(conf); @@ -764,7 +766,7 @@ public class MultiLayerTestRNN extends BaseDL4JTest { int nHiddenUnits = 17; try { - new NeuralNetConfiguration.Builder() + NeuralNetConfiguration.builder() .list() .layer(new org.deeplearning4j.nn.conf.layers.LSTM.Builder().nIn(nIn).nOut(nHiddenUnits).build()) .layer(new GlobalPoolingLayer()) @@ -783,7 +785,7 @@ public class MultiLayerTestRNN extends BaseDL4JTest { @Test public void testWrapperLayerGetPreviousState(){ - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() .layer(new FrozenLayer(new org.deeplearning4j.nn.conf.layers.LSTM.Builder() .nIn(5).nOut(5).build())) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/TestMasking.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/TestMasking.java index c4c3067a9..1cca6ede8 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/TestMasking.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/TestMasking.java @@ -66,11 +66,12 @@ public class TestMasking extends BaseDL4JTest { public void checkMaskArrayClearance() { for (boolean tbptt : new boolean[] {true, false}) { //Simple "does it throw an exception" type test... - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345).list() .layer(0, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MSE) .activation(Activation.IDENTITY).nIn(1).nOut(1).build()) .backpropType(tbptt ? BackpropType.TruncatedBPTT : BackpropType.Standard) - .tBPTTForwardLength(8).tBPTTBackwardLength(8).build(); + + .tbpttFwdLength(8).tbpttBackLength(8).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -149,7 +150,7 @@ public class TestMasking extends BaseDL4JTest { Activation a = act[i]; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new NoOp()) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new NoOp()) .dist(new NormalDistribution(0, 1)).seed(12345) .list() .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH) @@ -195,7 +196,7 @@ public class TestMasking extends BaseDL4JTest { //Do the same for CompGraph - ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder().updater(new NoOp()) + ComputationGraphConfiguration conf2 = NeuralNetConfiguration.builder().updater(new NoOp()) .dist(new NormalDistribution(0, 1)).seed(12345) .graphBuilder().addInputs("in") .addLayer("0", new DenseLayer.Builder().nIn(nIn).nOut(layerSize) @@ -237,7 +238,7 @@ public class TestMasking extends BaseDL4JTest { int nIn = 5; int nOut = 4; - ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder().updater(new NoOp()) + ComputationGraphConfiguration conf2 = NeuralNetConfiguration.builder().updater(new NoOp()) .dist(new NormalDistribution(0, 1)).seed(12345) .graphBuilder().addInputs("in") .addLayer("0", new DenseLayer.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH) @@ -269,7 +270,7 @@ public class TestMasking extends BaseDL4JTest { int cnnStride1 = 1; int channels = 1; - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .seed(12345) .weightInit(WeightInit.XAVIER) .convolutionMode(ConvolutionMode.Same) @@ -304,7 +305,7 @@ public class TestMasking extends BaseDL4JTest { @Test public void testMaskingStackUnstack(){ - ComputationGraphConfiguration nnConfig = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration nnConfig = NeuralNetConfiguration.builder() .updater(new Adam(2e-2)) .graphBuilder() .setInputTypes( diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/TestSetGetParameters.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/TestSetGetParameters.java index cb9536e3d..7b75bc97b 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/TestSetGetParameters.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/TestSetGetParameters.java @@ -21,7 +21,6 @@ package org.deeplearning4j.nn.multilayer; import org.deeplearning4j.BaseDL4JTest; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.layers.*; @@ -40,7 +39,7 @@ public class TestSetGetParameters extends BaseDL4JTest { @Test public void testSetParameters() { //Set up a MLN, then do set(get) on parameters. Results should be identical compared to before doing this. - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() .layer(0, new DenseLayer.Builder().nIn(9).nOut(10) .dist(new NormalDistribution(0, 1)).build()) .layer(1, new DenseLayer.Builder().nIn(10).nOut(11) @@ -55,12 +54,12 @@ public class TestSetGetParameters extends BaseDL4JTest { net.init(); INDArray initParams = net.params().dup(); - Map initParams2 = net.paramTable(); + Map initParams2 = net.getParamTable(); net.setParams(net.params()); INDArray initParamsAfter = net.params(); - Map initParams2After = net.paramTable(); + Map initParams2After = net.getParamTable(); for (String s : initParams2.keySet()) { assertEquals(initParams2.get(s), initParams2After.get(s), "Params differ: " + s); @@ -79,7 +78,7 @@ public class TestSetGetParameters extends BaseDL4JTest { public void testSetParametersRNN() { //Set up a MLN, then do set(get) on parameters. Results should be identical compared to before doing this. - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() .layer(0, new GravesLSTM.Builder().nIn(9).nOut(10) .dist(new NormalDistribution(0, 1)).build()) .layer(1, new GravesLSTM.Builder().nIn(10).nOut(11) @@ -92,12 +91,12 @@ public class TestSetGetParameters extends BaseDL4JTest { net.init(); INDArray initParams = net.params().dup(); - Map initParams2 = net.paramTable(); + Map initParams2 = net.getParamTable(); net.setParams(net.params()); INDArray initParamsAfter = net.params(); - Map initParams2After = net.paramTable(); + Map initParams2After = net.getParamTable(); for (String s : initParams2.keySet()) { assertEquals(initParams2.get(s), initParams2After.get(s), "Params differ: " + s); @@ -118,7 +117,7 @@ public class TestSetGetParameters extends BaseDL4JTest { Nd4j.getRandom().setSeed(12345); //Create configuration. Doesn't matter if this doesn't actually work for forward/backward pass here - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345).list() .layer(0, new ConvolutionLayer.Builder().nIn(10).nOut(10).kernelSize(2, 2).stride(2, 2) .padding(2, 2).build()) .layer(1, new DenseLayer.Builder().nIn(10).nOut(10).build()) @@ -145,9 +144,9 @@ public class TestSetGetParameters extends BaseDL4JTest { assertSame(params, net3.params()); //Same object due to clone - Map paramsMap = net.paramTable(); - Map paramsMap2 = net2.paramTable(); - Map paramsMap3 = net3.paramTable(); + Map paramsMap = net.getParamTable(); + Map paramsMap2 = net2.getParamTable(); + Map paramsMap3 = net3.getParamTable(); for (String s : paramsMap.keySet()) { assertEquals(paramsMap.get(s), paramsMap2.get(s)); assertEquals(paramsMap.get(s), paramsMap3.get(s)); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/TestVariableLengthTS.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/TestVariableLengthTS.java index 5d5daed14..7dc7480c6 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/TestVariableLengthTS.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/TestVariableLengthTS.java @@ -22,7 +22,6 @@ package org.deeplearning4j.nn.multilayer; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.nn.api.OptimizationAlgorithm; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.RNNFormat; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; @@ -48,7 +47,6 @@ import org.nd4j.linalg.learning.config.NoOp; import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; -import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.Random; @@ -72,7 +70,7 @@ public class TestVariableLengthTS extends BaseDL4JTest { for (int nExamples : miniBatchSizes) { Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(0.1)).seed(12345).list() .layer(0, new GravesLSTM.Builder().activation(Activation.TANH).nIn(2).nOut(2).build()) @@ -160,7 +158,7 @@ public class TestVariableLengthTS extends BaseDL4JTest { for (int nExamples : miniBatchSizes) { Nd4j.getRandom().setSeed(1234); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(0.1)).seed(12345).list() .layer(0, new DenseLayer.Builder().activation(Activation.TANH).nIn(2).nOut(2).build()) @@ -170,7 +168,7 @@ public class TestVariableLengthTS extends BaseDL4JTest { .nOut(1).activation(Activation.TANH).build()) .inputPreProcessor(0, new RnnToFeedForwardPreProcessor()) .inputPreProcessor(2, new FeedForwardToRnnPreProcessor()) - .setInputType(InputType.recurrent(2,-1, RNNFormat.NCW)) + .inputType(InputType.recurrent(2,-1, RNNFormat.NCW)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -305,8 +303,8 @@ public class TestVariableLengthTS extends BaseDL4JTest { INDArray input = Nd4j.rand(miniBatch, nIn, tsLength); INDArray labels = Nd4j.ones(miniBatch, nOut, tsLength); - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().seed(12345L).list() + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder().seed(12345L).list() .layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(5) .dist(new NormalDistribution(0, 1)) @@ -368,8 +366,8 @@ public class TestVariableLengthTS extends BaseDL4JTest { INDArray input = Nd4j.rand(miniBatch, nIn, tsLength); - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().seed(12345L).list() + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder().seed(12345L).list() .layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(5) .dist(new NormalDistribution(0, 1)) @@ -384,8 +382,8 @@ public class TestVariableLengthTS extends BaseDL4JTest { MultiLayerNetwork mln = new MultiLayerNetwork(conf); mln.init(); - MultiLayerConfiguration conf2 = - new NeuralNetConfiguration.Builder().seed(12345L).list() + NeuralNetConfiguration conf2 = + NeuralNetConfiguration.builder().seed(12345L).list() .layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(5) .dist(new NormalDistribution(0, 1)) @@ -440,7 +438,7 @@ public class TestVariableLengthTS extends BaseDL4JTest { int layerSize = 3; int nOut = 3; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER) .activation(Activation.TANH).list() .layer(0, new GravesBidirectionalLSTM.Builder().nIn(nIn).nOut(layerSize).build()) .layer(1, new GravesBidirectionalLSTM.Builder().nIn(layerSize).nOut(layerSize).build()) @@ -517,7 +515,7 @@ public class TestVariableLengthTS extends BaseDL4JTest { // System.out.println("Starting test: bidirectional = " + bidirectional + ", poolingType = " + pt); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER) .activation(Activation.TANH).list().layer(0, bidirectional ? new GravesBidirectionalLSTM.Builder().nIn(nIn).nOut(layerSize).build() : new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).build()) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/rl/TestMultiModelGradientApplication.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/rl/TestMultiModelGradientApplication.java index 92b8375dd..fe80d1e24 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/rl/TestMultiModelGradientApplication.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/rl/TestMultiModelGradientApplication.java @@ -22,7 +22,6 @@ package org.deeplearning4j.nn.rl; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; @@ -55,11 +54,11 @@ public class TestMultiModelGradientApplication extends BaseDL4JTest { for (boolean regularization : new boolean[] {false, true}) { for (IUpdater u : new IUpdater[] {new Sgd(0.1), new Nesterovs(0.1), new Adam(0.1)}) { - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().seed(12345).activation(Activation.TANH) + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder().seed(12345).activation(Activation.TANH) .weightInit(WeightInit.XAVIER).updater(u) .l1(regularization ? 0.2 : 0.0) - .l2(regularization ? 0.3 : 0.0).list() + .l2(regularization ? 0.3 : 0.0) .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(10).build()) .layer(1, new DenseLayer.Builder().nIn(10).nOut(10).build()).layer(2, new OutputLayer.Builder( @@ -125,8 +124,8 @@ public class TestMultiModelGradientApplication extends BaseDL4JTest { net2GradUpd.getUpdater().getStateViewArray()); //Remove the next 2 lines: fails - as net 1 is 1 iteration ahead - net1GradCalc.getLayerWiseConfigurations().setIterationCount(0); - net2GradUpd.getLayerWiseConfigurations().setIterationCount(0); + net1GradCalc.getConfiguration().setIterationCount(0); + net2GradUpd.getConfiguration().setIterationCount(0); for (int i = 0; i < 100; i++) { net1GradCalc.fit(f, l); @@ -148,7 +147,7 @@ public class TestMultiModelGradientApplication extends BaseDL4JTest { for (IUpdater u : new IUpdater[] {new Sgd(0.1), new Adam(0.1)}) { ComputationGraphConfiguration conf = - new NeuralNetConfiguration.Builder().seed(12345).activation(Activation.TANH) + NeuralNetConfiguration.builder().seed(12345).activation(Activation.TANH) .weightInit(WeightInit.XAVIER).updater(u) .l1(regularization ? 0.2 : 0.0) .l2(regularization ? 0.3 : 0.0).graphBuilder().addInputs("in") diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TestFrozenLayers.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TestFrozenLayers.java index ecda6b48a..d35b46911 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TestFrozenLayers.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TestFrozenLayers.java @@ -23,7 +23,6 @@ package org.deeplearning4j.nn.transferlearning; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.ConvolutionMode; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; @@ -76,7 +75,7 @@ public class TestFrozenLayers extends BaseDL4JTest { } Map paramsBefore = new LinkedHashMap<>(); - for(Map.Entry entry : transfer.paramTable().entrySet()){ + for(Map.Entry entry : transfer.getParamTable().entrySet()){ paramsBefore.put(entry.getKey(), entry.getValue().dup()); } @@ -86,7 +85,7 @@ public class TestFrozenLayers extends BaseDL4JTest { transfer.fit(f,l); } - for(Map.Entry entry : transfer.paramTable().entrySet()){ + for(Map.Entry entry : transfer.getParamTable().entrySet()){ String s = msg + " - " + entry.getKey(); if(entry.getKey().startsWith("5_")){ //Non-frozen layer @@ -152,7 +151,7 @@ public class TestFrozenLayers extends BaseDL4JTest { } public static MultiLayerNetwork getOriginalNet(int seed){ - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .seed(seed) .weightInit(WeightInit.XAVIER) .activation(Activation.TANH) @@ -165,7 +164,7 @@ public class TestFrozenLayers extends BaseDL4JTest { .layer(new DenseLayer.Builder().nOut(64).build()) .layer(new DenseLayer.Builder().nIn(64).nOut(64).build()) .layer(new OutputLayer.Builder().nIn(64).nOut(10).lossFunction(LossFunctions.LossFunction.MSE).build()) - .setInputType(InputType.convolutionalFlat(28,28,1)) + .inputType(InputType.convolutionalFlat(28,28,1)) .build(); @@ -175,7 +174,7 @@ public class TestFrozenLayers extends BaseDL4JTest { } public static ComputationGraph getOriginalGraph(int seed){ - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .seed(seed) .weightInit(WeightInit.XAVIER) .activation(Activation.TANH) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TestTransferLearningModelSerializer.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TestTransferLearningModelSerializer.java index 44c3bcb07..b328c8dff 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TestTransferLearningModelSerializer.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TestTransferLearningModelSerializer.java @@ -23,12 +23,11 @@ package org.deeplearning4j.nn.transferlearning; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.TestUtils; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.graph.GraphVertex; import org.deeplearning4j.nn.conf.graph.LayerVertex; import org.deeplearning4j.nn.conf.layers.DenseLayer; -import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.layers.FrozenLayer; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; @@ -53,7 +52,7 @@ public class TestTransferLearningModelSerializer extends BaseDL4JTest { int nIn = 6; int nOut = 3; - MultiLayerConfiguration origConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1)) + NeuralNetConfiguration origConf = NeuralNetConfiguration.builder().updater(new Sgd(0.1)) .activation(Activation.TANH).dropOut(0.5).list() .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(5).build()) .layer(1, new DenseLayer.Builder().nIn(5).nOut(4).build()) @@ -71,9 +70,9 @@ public class TestTransferLearningModelSerializer extends BaseDL4JTest { assertTrue(withFrozen.getLayer(0) instanceof FrozenLayer); assertTrue(withFrozen.getLayer(1) instanceof FrozenLayer); - assertTrue(withFrozen.getLayerWiseConfigurations().getConf(0) + assertTrue(withFrozen.getConfiguration().getConf(0) .getLayer() instanceof org.deeplearning4j.nn.conf.layers.misc.FrozenLayer); - assertTrue(withFrozen.getLayerWiseConfigurations().getConf(1) + assertTrue(withFrozen.getConfiguration().getConf(1) .getLayer() instanceof org.deeplearning4j.nn.conf.layers.misc.FrozenLayer); MultiLayerNetwork restored = TestUtils.testModelSerialization(withFrozen); @@ -102,7 +101,7 @@ public class TestTransferLearningModelSerializer extends BaseDL4JTest { int nIn = 6; int nOut = 3; - ComputationGraphConfiguration origConf = new NeuralNetConfiguration.Builder().activation(Activation.TANH).graphBuilder().addInputs("in") + ComputationGraphConfiguration origConf = NeuralNetConfiguration.builder().activation(Activation.TANH).graphBuilder().addInputs("in") .addLayer("0", new DenseLayer.Builder().nIn(nIn).nOut(5).build(), "in") .addLayer("1", new DenseLayer.Builder().nIn(5).nOut(4).build(), "0") .addLayer("2", new DenseLayer.Builder().nIn(4).nOut(3).build(), "1") @@ -121,8 +120,8 @@ public class TestTransferLearningModelSerializer extends BaseDL4JTest { assertTrue(withFrozen.getLayer(1) instanceof FrozenLayer); Map m = withFrozen.getComputationGraphConfiguration().getVertices(); - Layer l0 = ((LayerVertex) m.get("0")).getLayerConf().getLayer(); - Layer l1 = ((LayerVertex) m.get("1")).getLayerConf().getLayer(); + LayerConfiguration l0 = ((LayerVertex) m.get("0")).getNetConfiguration().getFirstLayer(); + LayerConfiguration l1 = ((LayerVertex) m.get("1")).getNetConfiguration().getFirstLayer(); assertTrue(l0 instanceof org.deeplearning4j.nn.conf.layers.misc.FrozenLayer); assertTrue(l1 instanceof org.deeplearning4j.nn.conf.layers.misc.FrozenLayer); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningCompGraphTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningCompGraphTest.java index efc821b6e..0f75f1426 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningCompGraphTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningCompGraphTest.java @@ -25,6 +25,7 @@ import org.deeplearning4j.TestUtils; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration.NeuralNetConfigurationBuilder; import org.deeplearning4j.nn.conf.constraint.UnitNormConstraint; import org.deeplearning4j.nn.conf.distribution.ConstantDistribution; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; @@ -63,7 +64,7 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { long rng = 12345L; DataSet randomData = new DataSet(Nd4j.rand(10, 4), Nd4j.rand(10, 3)); //original conf - ComputationGraphConfiguration confToChange = new NeuralNetConfiguration.Builder().seed(rng) + ComputationGraphConfiguration confToChange = NeuralNetConfiguration.builder().seed(rng) .optimizationAlgo(OptimizationAlgorithm.LBFGS).updater(new Nesterovs(0.01, 0.99)) .graphBuilder().addInputs("layer0In").setInputTypes(InputType.feedForward(4)) .addLayer("layer0", new DenseLayer.Builder().nIn(4).nOut(3).build(), "layer0In") @@ -76,7 +77,7 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { .setOutputs("layer1").build(); //conf with learning parameters changed - ComputationGraphConfiguration expectedConf = new NeuralNetConfiguration.Builder().seed(rng) + ComputationGraphConfiguration expectedConf = NeuralNetConfiguration.builder().seed(rng) .updater(new RmsProp(0.2)) .graphBuilder().addInputs("layer0In") .setInputTypes(InputType.feedForward(4)) @@ -115,7 +116,7 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { public void testNoutChanges() { DataSet randomData = new DataSet(Nd4j.rand(10, 4), Nd4j.rand(10, 2)); - NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1)) + NeuralNetConfiguration.NeuralNetConfigurationBuilder overallConf = NeuralNetConfiguration.builder().updater(new Sgd(0.1)) .activation(Activation.IDENTITY); FineTuneConfiguration fineTuneConfiguration = new FineTuneConfiguration.Builder().updater(new Sgd(0.1)) .activation(Activation.IDENTITY).build(); @@ -138,9 +139,9 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { //.setOutputs("layer3") .build(); - BaseLayer bl0 = ((BaseLayer) modelNow.getLayer("layer0").conf().getLayer()); - BaseLayer bl1 = ((BaseLayer) modelNow.getLayer("layer1").conf().getLayer()); - BaseLayer bl3 = ((BaseLayer) modelNow.getLayer("layer3").conf().getLayer()); + BaseLayer bl0 = ((BaseLayer) modelNow.getLayer("layer0").getLayerConfiguration()); + BaseLayer bl1 = ((BaseLayer) modelNow.getLayer("layer1").getLayerConfiguration()); + BaseLayer bl3 = ((BaseLayer) modelNow.getLayer("layer3").getLayerConfiguration()); assertEquals(bl0.getWeightInitFn(), new WeightInitDistribution(new NormalDistribution(1, 1e-1))); assertEquals(bl1.getWeightInitFn(), new WeightInitXavier()); assertEquals(bl1.getWeightInitFn(), new WeightInitXavier()); @@ -182,7 +183,7 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { public void testRemoveAndAdd() { DataSet randomData = new DataSet(Nd4j.rand(10, 4), Nd4j.rand(10, 3)); - NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1)) + NeuralNetConfiguration.NeuralNetConfigurationBuilder overallConf = NeuralNetConfiguration.builder().updater(new Sgd(0.1)) .activation(Activation.IDENTITY); FineTuneConfiguration fineTuneConfiguration = new FineTuneConfiguration.Builder().updater(new Sgd(0.1)) .activation(Activation.IDENTITY).build(); @@ -250,7 +251,7 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { DataSet randomData = new DataSet(Nd4j.rand(10, 28 * 28 * 3).reshape(10, 3, 28, 28), Nd4j.rand(10, 10)); ComputationGraph modelToFineTune = new ComputationGraph( - new NeuralNetConfiguration.Builder().seed(123) + NeuralNetConfiguration.builder().seed(123) .weightInit(WeightInit.XAVIER) .updater(new Nesterovs(0.01, 0.9)).graphBuilder() .addInputs("layer0In") @@ -303,7 +304,7 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { modelToFineTune.init(); //this will override the learning configuration set in the model - NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().seed(456).updater(new Sgd(0.001)); + NeuralNetConfiguration.NeuralNetConfigurationBuilder overallConf = NeuralNetConfiguration.builder().seed(456).updater(new Sgd(0.001)); FineTuneConfiguration fineTuneConfiguration = new FineTuneConfiguration.Builder().seed(456).updater(new Sgd(0.001)) .build(); @@ -399,7 +400,7 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { @Test public void testTransferGlobalPool() { - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).updater(new Adam(0.1)) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345).updater(new Adam(0.1)) .weightInit(WeightInit.XAVIER) .graphBuilder().addInputs("in") .addLayer("blstm1",new GravesBidirectionalLSTM.Builder().nIn(10).nOut(10) @@ -425,7 +426,7 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { .nIn(10).nOut(5).build(), "dense") .build(); - ComputationGraphConfiguration confExpected = new NeuralNetConfiguration.Builder().seed(12345) + ComputationGraphConfiguration confExpected = NeuralNetConfiguration.builder().seed(12345) .updater(new Sgd(0.01)) .weightInit(WeightInit.XAVIER) .graphBuilder().addInputs("in") @@ -452,7 +453,7 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { @Test public void testObjectOverrides(){ //https://github.com/deeplearning4j/deeplearning4j/issues/4368 - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .dropOut(0.5) .weightNoise(new DropConnect(0.5)) .l2(0.5) @@ -477,7 +478,7 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { .fineTuneConfiguration(ftc) .build(); - DenseLayer l = (DenseLayer) transfer.getLayer(0).conf().getLayer(); + DenseLayer l = (DenseLayer) transfer.getLayer(0).getLayerConfiguration(); assertNull(l.getIDropout()); assertNull(l.getWeightNoise()); @@ -494,7 +495,7 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { final String firstConv = "firstConv"; final String secondConv = "secondConv"; final INDArray input = Nd4j.create(6,6,6,6); - final ComputationGraph graph = new ComputationGraph(new NeuralNetConfiguration.Builder() + final ComputationGraph graph = new ComputationGraph(NeuralNetConfiguration.builder() .weightInit(new ConstantDistribution(666)) .graphBuilder() .addInputs(inputName) @@ -541,7 +542,7 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { final String afterPoolName = "afterPool"; final String outputName = "output"; final INDArray input = Nd4j.create(new long[] {1, 2, 4, 4}); - final ComputationGraph graph = new ComputationGraph(new NeuralNetConfiguration.Builder() + final ComputationGraph graph = new ComputationGraph(NeuralNetConfiguration.builder() .graphBuilder() .addInputs(inputName) .setOutputs(outputName) @@ -578,7 +579,7 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { @Test public void testTransferLearningSameDiffLayersGraph(){ - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") @@ -624,7 +625,7 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { @Test public void testTransferLearningSameDiffLayersGraphVertex(){ - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningComplex.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningComplex.java index d30227339..ba201c62a 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningComplex.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningComplex.java @@ -26,6 +26,7 @@ import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration.NeuralNetConfigurationBuilder; import org.deeplearning4j.nn.conf.graph.MergeVertex; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.BaseLayer; @@ -55,7 +56,7 @@ public class TransferLearningComplex extends BaseDL4JTest { // (b) Test global override (should be selective) - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().updater(new Adam(1e-4)) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().updater(new Adam(1e-4)) .activation(Activation.LEAKYRELU).graphBuilder().addInputs("in1", "in2") .addLayer("A", new DenseLayer.Builder().nIn(10).nOut(9).build(), "in1") .addLayer("B", new DenseLayer.Builder().nIn(9).nOut(8).build(), "A") @@ -87,9 +88,9 @@ public class TransferLearningComplex extends BaseDL4JTest { Layer[] layers = graph2.getLayers(); for (Layer l : layers) { - String name = l.conf().getLayer().getLayerName(); + String name = l.getLayerConfiguration().getLayerName(); log.info(name + "\t frozen: " + (l instanceof FrozenLayer)); - if ("C".equals(l.conf().getLayer().getLayerName())) { + if ("C".equals(l.getLayerConfiguration().getLayerName())) { //Only C should be frozen in this config cFound = true; assertTrue(l instanceof FrozenLayer, name); @@ -98,7 +99,7 @@ public class TransferLearningComplex extends BaseDL4JTest { } //Also check config: - BaseLayer bl = ((BaseLayer) l.conf().getLayer()); + BaseLayer bl = ((BaseLayer) l.getLayerConfiguration()); assertEquals(new Adam(2e-2), bl.getIUpdater()); assertEquals(Activation.LEAKYRELU.getActivationFunction(), bl.getActivationFn()); } @@ -109,7 +110,7 @@ public class TransferLearningComplex extends BaseDL4JTest { @Test public void testSimplerMergeBackProp() { - NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.9)) + NeuralNetConfiguration.NeuralNetConfigurationBuilder overallConf = NeuralNetConfiguration.builder().updater(new Sgd(0.9)) .activation(Activation.IDENTITY) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT); @@ -191,7 +192,7 @@ public class TransferLearningComplex extends BaseDL4JTest { @Test public void testLessSimpleMergeBackProp() { - NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.9)) + NeuralNetConfiguration.NeuralNetConfigurationBuilder overallConf = NeuralNetConfiguration.builder().updater(new Sgd(0.9)) .activation(Activation.IDENTITY); /* @@ -248,7 +249,7 @@ public class TransferLearningComplex extends BaseDL4JTest { @Test public void testAddOutput() { - NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.9)) + NeuralNetConfiguration.NeuralNetConfigurationBuilder overallConf = NeuralNetConfiguration.builder().updater(new Sgd(0.9)) .activation(Activation.IDENTITY); ComputationGraphConfiguration conf = overallConf.graphBuilder().addInputs("inCentre", "inRight") diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningHelperTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningHelperTest.java index d7e58be43..f606e6402 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningHelperTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningHelperTest.java @@ -28,6 +28,7 @@ import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.graph.MergeVertex; import org.deeplearning4j.nn.conf.graph.SubsetVertex; import org.deeplearning4j.nn.conf.layers.DenseLayer; +import org.deeplearning4j.nn.conf.layers.DenseLayer.Builder; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; @@ -41,6 +42,7 @@ import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; import java.util.List; +import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -50,7 +52,7 @@ public class TransferLearningHelperTest extends BaseDL4JTest { @Test public void tesUnfrozenSubset() { - NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().seed(124) + NeuralNetConfiguration.NeuralNetConfigurationBuilder overallConf = NeuralNetConfiguration.builder().seed(124) .activation(Activation.IDENTITY) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(new Sgd(0.1)); /* @@ -132,7 +134,7 @@ public class TransferLearningHelperTest extends BaseDL4JTest { @Test public void testFitUnFrozen() { - NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.9)).seed(124) + NeuralNetConfiguration.NeuralNetConfigurationBuilder overallConf = NeuralNetConfiguration.builder().updater(new Sgd(0.9)).seed(124) .activation(Activation.IDENTITY) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT); @@ -185,11 +187,11 @@ public class TransferLearningHelperTest extends BaseDL4JTest { assertEquals(modelIdentical.getLayer("denseCentre2").params(), modelToTune.getLayer("denseCentre2").params()); assertEquals(modelIdentical.getLayer("denseCentre3").params(), modelToTune.getLayer("denseCentre3").params()); assertEquals(modelIdentical.getLayer("outCentre").params(), modelToTune.getLayer("outCentre").params()); - assertEquals(modelIdentical.getLayer("denseRight").conf().toJson(), - modelToTune.getLayer("denseRight").conf().toJson()); + assertEquals(modelIdentical.getLayer("denseRight").getNetConfiguration().toJson(), + modelToTune.getLayer("denseRight").getNetConfiguration().toJson()); assertEquals(modelIdentical.getLayer("denseRight").params(), modelToTune.getLayer("denseRight").params()); - assertEquals(modelIdentical.getLayer("denseRight0").conf().toJson(), - modelToTune.getLayer("denseRight0").conf().toJson()); + assertEquals(modelIdentical.getLayer("denseRight0").getNetConfiguration().toJson(), + modelToTune.getLayer("denseRight0").getNetConfiguration().toJson()); //assertEquals(modelIdentical.getLayer("denseRight0").params(),modelToTune.getLayer("denseRight0").params()); assertEquals(modelIdentical.getLayer("denseRight1").params(), modelToTune.getLayer("denseRight1").params()); assertEquals(modelIdentical.getLayer("outRight").params(), modelToTune.getLayer("outRight").params()); @@ -206,18 +208,19 @@ public class TransferLearningHelperTest extends BaseDL4JTest { public void testMLN() { DataSet randomData = new DataSet(Nd4j.rand(10, 4), Nd4j.rand(10, 3)); - NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1)) + NeuralNetConfiguration.NeuralNetConfigurationBuilder overallConf = NeuralNetConfiguration.builder().updater(new Sgd(0.1)) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .activation(Activation.IDENTITY); - MultiLayerNetwork modelToFineTune = new MultiLayerNetwork(overallConf.clone().list() - .layer(0, new DenseLayer.Builder().nIn(4).nOut(3).build()) - .layer(1, new DenseLayer.Builder().nIn(3).nOut(2).build()) - .layer(2, new DenseLayer.Builder().nIn(2).nOut(3).build()) - .layer(3, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( - LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(3).nOut(3) - .build()) - .build()); + MultiLayerNetwork modelToFineTune = new MultiLayerNetwork( + (NeuralNetConfiguration) overallConf.clone() + .layer(0, new Builder().nIn(4).nOut(3).build()) + .layer(1, new Builder().nIn(3).nOut(2).build()) + .layer(2, new Builder().nIn(2).nOut(3).build()) + .layer(3, new OutputLayer.Builder( + LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(3).nOut(3) + .build()) + .build()); modelToFineTune.init(); MultiLayerNetwork modelNow = new TransferLearning.Builder(modelToFineTune).setFeatureExtractor(1).build(); @@ -228,12 +231,13 @@ public class TransferLearningHelperTest extends BaseDL4JTest { INDArray paramsLastTwoLayers = Nd4j.hstack(modelToFineTune.getLayer(2).params(), modelToFineTune.getLayer(3).params()); - MultiLayerNetwork notFrozen = new MultiLayerNetwork(overallConf.clone().list() - .layer(0, new DenseLayer.Builder().nIn(2).nOut(3).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( - LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(3).nOut(3) - .build()) - .build(), paramsLastTwoLayers); + MultiLayerNetwork notFrozen = new MultiLayerNetwork( + (NeuralNetConfiguration) overallConf.clone().list() + .layer(0, new Builder().nIn(2).nOut(3).build()) + .layer(1, new OutputLayer.Builder( + LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(3).nOut(3) + .build()) + .build(), paramsLastTwoLayers); assertEquals(asFrozenFeatures, helper.featurize(randomData).getFeatures()); assertEquals(randomData.getLabels(), helper.featurize(randomData).getLabels()); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningMLNTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningMLNTest.java index 005f2158c..cda7da0b4 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningMLNTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningMLNTest.java @@ -26,13 +26,13 @@ import org.deeplearning4j.TestUtils; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.BackpropType; import org.deeplearning4j.nn.conf.GradientNormalization; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.constraint.UnitNormConstraint; import org.deeplearning4j.nn.conf.distribution.ConstantDistribution; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.*; +import org.deeplearning4j.nn.conf.layers.DenseLayer.Builder; import org.deeplearning4j.nn.conf.preprocessor.CnnToFeedForwardPreProcessor; import org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor; import org.deeplearning4j.nn.conf.preprocessor.RnnToCnnPreProcessor; @@ -54,6 +54,7 @@ import org.nd4j.linalg.lossfunctions.LossFunctions; import com.fasterxml.jackson.core.JsonProcessingException; import java.util.Map; +import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction; import static org.junit.jupiter.api.Assertions.*; @@ -67,16 +68,17 @@ public class TransferLearningMLNTest extends BaseDL4JTest { Nd4j.getRandom().setSeed(rng); DataSet randomData = new DataSet(Nd4j.rand(DataType.FLOAT, 10, 4), TestUtils.randomOneHot(DataType.FLOAT, 10, 3)); //original conf - NeuralNetConfiguration.Builder confToChange = - new NeuralNetConfiguration.Builder().seed(rng).optimizationAlgo(OptimizationAlgorithm.LBFGS) + NeuralNetConfiguration.NeuralNetConfigurationBuilder confToChange = + (NeuralNetConfiguration.NeuralNetConfigurationBuilder) NeuralNetConfiguration.builder().seed(rng).optimizationAlgo(OptimizationAlgorithm.LBFGS) .updater(new Nesterovs(0.01, 0.99)); - MultiLayerNetwork modelToFineTune = new MultiLayerNetwork(confToChange.list() - .layer(0, new DenseLayer.Builder().nIn(4).nOut(3).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( - LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(3).nOut(3) - .build()) - .build()); + MultiLayerNetwork modelToFineTune = new MultiLayerNetwork( + (NeuralNetConfiguration) confToChange.list() + .layer(0, new Builder().nIn(4).nOut(3).build()) + .layer(1, new OutputLayer.Builder( + LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(3).nOut(3) + .build()) + .build()); modelToFineTune.init(); //model after applying changes with transfer learning @@ -89,19 +91,19 @@ public class TransferLearningMLNTest extends BaseDL4JTest { .build(); for (org.deeplearning4j.nn.api.Layer l : modelNow.getLayers()) { - BaseLayer bl = ((BaseLayer) l.conf().getLayer()); + BaseLayer bl = ((BaseLayer) l.getLayerConfiguration()); assertEquals(new RmsProp(0.5), bl.getIUpdater()); } - NeuralNetConfiguration.Builder confSet = new NeuralNetConfiguration.Builder().seed(rng) + NeuralNetConfiguration.NeuralNetConfigurationBuilder confSet = NeuralNetConfiguration.builder().seed(rng) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new RmsProp(0.5)).l2(0.4); - MultiLayerNetwork expectedModel = new MultiLayerNetwork(confSet.list() - .layer(0, new DenseLayer.Builder().nIn(4).nOut(3).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( - LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(3).nOut(3) + MultiLayerNetwork expectedModel = new MultiLayerNetwork((NeuralNetConfiguration) confSet.list() + .layer(0, new Builder().nIn(4).nOut(3).build()) + .layer(1, new OutputLayer.Builder( + LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(3).nOut(3) .build()) .build()); expectedModel.init(); @@ -110,8 +112,8 @@ public class TransferLearningMLNTest extends BaseDL4JTest { assertEquals(expectedModel.params(), modelNow.params()); //Check json - MultiLayerConfiguration expectedConf = expectedModel.getLayerWiseConfigurations(); - assertEquals(expectedConf.toJson(), modelNow.getLayerWiseConfigurations().toJson()); + NeuralNetConfiguration expectedConf = expectedModel.getConfiguration(); + assertEquals(expectedConf.toJson(), modelNow.getConfiguration().toJson()); //Check params after fit modelNow.fit(randomData); @@ -128,11 +130,11 @@ public class TransferLearningMLNTest extends BaseDL4JTest { Nd4j.getRandom().setSeed(12345); DataSet randomData = new DataSet(Nd4j.rand(DataType.FLOAT, 10, 4), TestUtils.randomOneHot(DataType.FLOAT,10, 2)); - NeuralNetConfiguration.Builder equivalentConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1)); + NeuralNetConfiguration.NeuralNetConfigurationBuilder equivalentConf = NeuralNetConfiguration.builder().updater(new Sgd(0.1)); FineTuneConfiguration overallConf = new FineTuneConfiguration.Builder().updater(new Sgd(0.1)) .build(); - MultiLayerNetwork modelToFineTune = new MultiLayerNetwork(equivalentConf.list() + MultiLayerNetwork modelToFineTune = new MultiLayerNetwork(equivalentConf .layer(0, new DenseLayer.Builder().nIn(4).nOut(5).build()) .layer(1, new DenseLayer.Builder().nIn(3).nOut(2).build()) .layer(2, new DenseLayer.Builder().nIn(2).nOut(3).build()) @@ -145,7 +147,7 @@ public class TransferLearningMLNTest extends BaseDL4JTest { .nOutReplace(3, 2, WeightInit.XAVIER, WeightInit.XAVIER) .nOutReplace(0, 3, WeightInit.XAVIER, new NormalDistribution(1, 1e-1)).build(); - MultiLayerNetwork modelExpectedArch = new MultiLayerNetwork(equivalentConf.list() + MultiLayerNetwork modelExpectedArch = new MultiLayerNetwork(equivalentConf .layer(0, new DenseLayer.Builder().nIn(4).nOut(3).build()) .layer(1, new DenseLayer.Builder().nIn(3).nOut(2).build()) .layer(2, new DenseLayer.Builder().nIn(2).nOut(3).build()) @@ -156,11 +158,11 @@ public class TransferLearningMLNTest extends BaseDL4JTest { modelExpectedArch.init(); //Will fail - expected because of dist and weight init changes - //assertEquals(modelExpectedArch.getLayerWiseConfigurations().toJson(), modelNow.getLayerWiseConfigurations().toJson()); + //assertEquals(modelExpectedArch.getConfiguration().toJson(), modelNow.getConfiguration().toJson()); - BaseLayer bl0 = ((BaseLayer) modelNow.getLayerWiseConfigurations().getConf(0).getLayer()); - BaseLayer bl1 = ((BaseLayer) modelNow.getLayerWiseConfigurations().getConf(1).getLayer()); - BaseLayer bl3 = ((BaseLayer) modelNow.getLayerWiseConfigurations().getConf(3).getLayer()); + BaseLayer bl0 = ((BaseLayer) modelNow.getConfiguration().getConf(0).getLayer()); + BaseLayer bl1 = ((BaseLayer) modelNow.getConfiguration().getConf(1).getLayer()); + BaseLayer bl3 = ((BaseLayer) modelNow.getConfiguration().getConf(3).getLayer()); assertEquals(bl0.getWeightInitFn().getClass(), WeightInitXavier.class); try { assertEquals(JsonMappers.getMapper().writeValueAsString(bl1.getWeightInitFn()), @@ -191,7 +193,7 @@ public class TransferLearningMLNTest extends BaseDL4JTest { Nd4j.getRandom().setSeed(12345); DataSet randomData = new DataSet(Nd4j.rand(DataType.FLOAT,10, 4), TestUtils.randomOneHot(DataType.FLOAT, 10, 3)); - NeuralNetConfiguration.Builder equivalentConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1)); + NeuralNetConfiguration.NeuralNetConfigurationBuilder equivalentConf = (NeuralNetConfiguration.NeuralNetConfigurationBuilder) NeuralNetConfiguration.builder().updater(new Sgd(0.1)); FineTuneConfiguration overallConf = new FineTuneConfiguration.Builder().updater(new Sgd(0.1)).build(); MultiLayerNetwork modelToFineTune = new MultiLayerNetwork(//overallConf.list() @@ -248,8 +250,8 @@ public class TransferLearningMLNTest extends BaseDL4JTest { int V_HEIGHT = 130; int V_NFRAMES = 150; - MultiLayerConfiguration confForArchitecture = - new NeuralNetConfiguration.Builder().seed(12345).l2(0.001) //l2 regularization on all layers + NeuralNetConfiguration confForArchitecture = + NeuralNetConfiguration.builder().seed(12345).l2(0.001) //l2 regularization on all layers .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new AdaGrad(0.4)).list() .layer(0, new ConvolutionLayer.Builder(10, 10).nIn(3) //3 channels: RGB @@ -277,13 +279,13 @@ public class TransferLearningMLNTest extends BaseDL4JTest { .inputPreProcessor(3, new CnnToFeedForwardPreProcessor(7, 7, 10)) .inputPreProcessor(4, new FeedForwardToRnnPreProcessor()) .backpropType(BackpropType.TruncatedBPTT) - .tBPTTForwardLength(V_NFRAMES / 5).tBPTTBackwardLength(V_NFRAMES / 5).build(); + .tbpttFwdLength(V_NFRAMES / 5).tbpttBackLength(V_NFRAMES / 5).build(); MultiLayerNetwork modelExpectedArch = new MultiLayerNetwork(confForArchitecture); modelExpectedArch.init(); MultiLayerNetwork modelToTweak = new MultiLayerNetwork( - new NeuralNetConfiguration.Builder().seed(12345) + NeuralNetConfiguration.builder().seed(12345) .updater(new RmsProp(0.1)) .list() .layer(0, new ConvolutionLayer.Builder(10, 10) //Only keep the first layer the same @@ -324,8 +326,8 @@ public class TransferLearningMLNTest extends BaseDL4JTest { .inputPreProcessor(4, new FeedForwardToRnnPreProcessor()) .backpropType(BackpropType.TruncatedBPTT) - .tBPTTForwardLength(V_NFRAMES / 5) - .tBPTTBackwardLength(V_NFRAMES / 5).build()); + .tbpttFwdLength(V_NFRAMES / 5) + .tbpttBackLength(V_NFRAMES / 5).build()); modelToTweak.init(); MultiLayerNetwork modelNow = new TransferLearning.Builder(modelToTweak) @@ -355,18 +357,18 @@ public class TransferLearningMLNTest extends BaseDL4JTest { .setInputPreProcessor(4, new FeedForwardToRnnPreProcessor()).build(); //modelNow should have the same architecture as modelExpectedArch - assertEquals(modelExpectedArch.getLayerWiseConfigurations().getConf(0).toJson(), - modelNow.getLayerWiseConfigurations().getConf(0).toJson()); + assertEquals(modelExpectedArch.getConfiguration().getConf(0).toJson(), + modelNow.getConfiguration().getConf(0).toJson()); //some learning related info the subsampling layer will not be overwritten - //assertTrue(modelExpectedArch.getLayerWiseConfigurations().getConf(1).toJson().equals(modelNow.getLayerWiseConfigurations().getConf(1).toJson())); - assertEquals(modelExpectedArch.getLayerWiseConfigurations().getConf(2).toJson(), - modelNow.getLayerWiseConfigurations().getConf(2).toJson()); - assertEquals(modelExpectedArch.getLayerWiseConfigurations().getConf(3).toJson(), - modelNow.getLayerWiseConfigurations().getConf(3).toJson()); - assertEquals(modelExpectedArch.getLayerWiseConfigurations().getConf(4).toJson(), - modelNow.getLayerWiseConfigurations().getConf(4).toJson()); - assertEquals(modelExpectedArch.getLayerWiseConfigurations().getConf(5).toJson(), - modelNow.getLayerWiseConfigurations().getConf(5).toJson()); + //assertTrue(modelExpectedArch.getConfiguration().getConf(1).toJson().equals(modelNow.getConfiguration().getConf(1).toJson())); + assertEquals(modelExpectedArch.getConfiguration().getConf(2).toJson(), + modelNow.getConfiguration().getConf(2).toJson()); + assertEquals(modelExpectedArch.getConfiguration().getConf(3).toJson(), + modelNow.getConfiguration().getConf(3).toJson()); + assertEquals(modelExpectedArch.getConfiguration().getConf(4).toJson(), + modelNow.getConfiguration().getConf(4).toJson()); + assertEquals(modelExpectedArch.getConfiguration().getConf(5).toJson(), + modelNow.getConfiguration().getConf(5).toJson()); assertArrayEquals(modelExpectedArch.params().shape(), modelNow.params().shape()); assertArrayEquals(modelExpectedArch.getLayer(0).params().shape(), modelNow.getLayer(0).params().shape()); @@ -386,7 +388,7 @@ public class TransferLearningMLNTest extends BaseDL4JTest { DataSet randomData = new DataSet(Nd4j.rand(DataType.FLOAT, 10, 28 * 28 * 3).reshape(10, 3, 28, 28), TestUtils.randomOneHot(DataType.FLOAT,10, 10)); MultiLayerNetwork modelToFineTune = new MultiLayerNetwork( - new NeuralNetConfiguration.Builder().seed(123) + NeuralNetConfiguration.builder().seed(123) .weightInit(WeightInit.XAVIER) .updater(new Nesterovs(0.01, 0.9)) .list() @@ -413,12 +415,12 @@ public class TransferLearningMLNTest extends BaseDL4JTest { .nOut(100) .activation(Activation.SOFTMAX) .build()) - .setInputType(InputType.convolutionalFlat(28, 28, 3)) + .inputType(InputType.convolutionalFlat(28, 28, 3)) .build()); modelToFineTune.init(); INDArray asFrozenFeatures = modelToFineTune.feedForwardToLayer(2, randomData.getFeatures(), false).get(2); //10x20x12x12 - NeuralNetConfiguration.Builder equivalentConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.2)) + NeuralNetConfiguration.NeuralNetConfigurationBuilder equivalentConf = (NeuralNetConfiguration.NeuralNetConfigurationBuilder) NeuralNetConfiguration.builder().updater(new Sgd(0.2)) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT); FineTuneConfiguration overallConf = new FineTuneConfiguration.Builder().updater(new Sgd(0.2)) @@ -444,7 +446,7 @@ public class TransferLearningMLNTest extends BaseDL4JTest { .layer(5, new DenseLayer.Builder().activation(Activation.RELU).nOut(50).build()) .layer(6, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).nOut(10) .activation(Activation.SOFTMAX).build()) - .setInputType(InputType.convolutionalFlat(12, 12, 20)).build()); + .inputType(InputType.convolutionalFlat(12, 12, 20)).build()); notFrozen.init(); assertArrayEquals(modelToFineTune.getLayer(0).params().shape(), modelNow.getLayer(0).params().shape()); @@ -481,8 +483,8 @@ public class TransferLearningMLNTest extends BaseDL4JTest { public void testFineTuneOverride() { //Check that fine-tune overrides are selective - i.e., if I only specify a new LR, only the LR should be modified - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().updater(new Adam(1e-4)) + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder().updater(new Adam(1e-4)) .activation(Activation.TANH).weightInit(WeightInit.RELU) .l1(0.1).l2(0.2).list() .layer(0, new DenseLayer.Builder().nIn(10).nOut(5).build()).layer(1, @@ -501,13 +503,13 @@ public class TransferLearningMLNTest extends BaseDL4JTest { //Check original net isn't modified: - BaseLayer l0 = (BaseLayer) net.getLayer(0).conf().getLayer(); + BaseLayer l0 = (BaseLayer) net.getLayer(0).getLayerConfiguration(); assertEquals(new Adam(1e-4), l0.getIUpdater()); assertEquals(Activation.TANH.getActivationFunction(), l0.getActivationFn()); assertEquals(new WeightInitRelu(), l0.getWeightInitFn()); assertEquals(0.1, TestUtils.getL1(l0), 1e-6); - BaseLayer l1 = (BaseLayer) net.getLayer(1).conf().getLayer(); + BaseLayer l1 = (BaseLayer) net.getLayer(1).getLayerConfiguration(); assertEquals(new Adam(1e-4), l1.getIUpdater()); assertEquals(Activation.HARDSIGMOID.getActivationFunction(), l1.getActivationFn()); assertEquals(new WeightInitRelu(), l1.getWeightInitFn()); @@ -516,19 +518,19 @@ public class TransferLearningMLNTest extends BaseDL4JTest { assertEquals(BackpropType.Standard, conf.getBackpropType()); //Check new net has only the appropriate things modified (i.e., LR) - l0 = (BaseLayer) net2.getLayer(0).conf().getLayer(); + l0 = (BaseLayer) net2.getLayer(0).getLayerConfiguration(); assertEquals(new Adam(2e-2), l0.getIUpdater()); assertEquals(Activation.TANH.getActivationFunction(), l0.getActivationFn()); assertEquals(new WeightInitRelu(), l0.getWeightInitFn()); assertEquals(0.1, TestUtils.getL1(l0), 1e-6); - l1 = (BaseLayer) net2.getLayer(1).conf().getLayer(); + l1 = (BaseLayer) net2.getLayer(1).getLayerConfiguration(); assertEquals(new Adam(2e-2), l1.getIUpdater()); assertEquals(Activation.HARDSIGMOID.getActivationFunction(), l1.getActivationFn()); assertEquals(new WeightInitRelu(), l1.getWeightInitFn()); assertEquals(0.2, TestUtils.getL2(l1), 1e-6); - assertEquals(BackpropType.TruncatedBPTT, net2.getLayerWiseConfigurations().getBackpropType()); + assertEquals(BackpropType.TruncatedBPTT, net2.getConfiguration().getBackpropType()); } @Test @@ -538,7 +540,7 @@ public class TransferLearningMLNTest extends BaseDL4JTest { DataSet randomData = new DataSet(Nd4j.rand(DataType.FLOAT,10, 28 * 28 * 3).reshape(10, 3, 28, 28), TestUtils.randomOneHot(10, 10)); MultiLayerNetwork modelToFineTune = new MultiLayerNetwork( - new NeuralNetConfiguration.Builder().seed(123) + NeuralNetConfiguration.builder().seed(123) .weightInit(WeightInit.XAVIER) .updater(new Nesterovs(0.01, 0.9)) .list() @@ -554,12 +556,12 @@ public class TransferLearningMLNTest extends BaseDL4JTest { .layer(5, new DenseLayer.Builder().activation(Activation.RELU).nOut(250).build()) .layer(6, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nOut(100).activation(Activation.SOFTMAX).build()) - .setInputType(InputType.convolutionalFlat(28, 28, 3)) //See note below + .inputType(InputType.convolutionalFlat(28, 28, 3)) //See note below .build()); modelToFineTune.init(); INDArray asFrozenFeatures = modelToFineTune.feedForwardToLayer(2, randomData.getFeatures(), false).get(2); //10x20x12x12 - NeuralNetConfiguration.Builder equivalentConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.2)); + NeuralNetConfiguration.NeuralNetConfigurationBuilder equivalentConf = (NeuralNetConfiguration.NeuralNetConfigurationBuilder) NeuralNetConfiguration.builder().updater(new Sgd(0.2)); FineTuneConfiguration overallConf = new FineTuneConfiguration.Builder().updater(new Sgd(0.2)).build(); MultiLayerNetwork modelNow = new TransferLearning.Builder(modelToFineTune).fineTuneConfiguration(overallConf) @@ -610,7 +612,7 @@ public class TransferLearningMLNTest extends BaseDL4JTest { @Test public void testObjectOverrides(){ //https://github.com/deeplearning4j/deeplearning4j/issues/4368 - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dropOut(0.5) .weightNoise(new DropConnect(0.5)) .l2(0.5) @@ -633,7 +635,7 @@ public class TransferLearningMLNTest extends BaseDL4JTest { .fineTuneConfiguration(ftc) .build(); - DenseLayer l = (DenseLayer) transfer.getLayer(0).conf().getLayer(); + DenseLayer l = (DenseLayer) transfer.getLayer(0).getLayerConfiguration(); assertNull(l.getIDropout()); assertNull(l.getWeightNoise()); @@ -645,10 +647,10 @@ public class TransferLearningMLNTest extends BaseDL4JTest { @Test public void testTransferLearningSubsequent() { final INDArray input = Nd4j.create(6,6,6,6); - final MultiLayerNetwork net = new MultiLayerNetwork(new NeuralNetConfiguration.Builder() + final MultiLayerNetwork net = new MultiLayerNetwork(NeuralNetConfiguration.builder() .weightInit(new ConstantDistribution(666)) .list() - .setInputType(InputType.inferInputTypes(input)[0]) + .inputType(InputType.inferInputTypes(input)[0]) .layer(new Convolution2D.Builder(3, 3).nOut(10).build()) .layer(new Convolution2D.Builder(1, 1).nOut(3).build()) .layer(new OutputLayer.Builder().nOut(2).lossFunction(LossFunctions.LossFunction.MSE) @@ -677,9 +679,9 @@ public class TransferLearningMLNTest extends BaseDL4JTest { @Test public void testChangeNOutNIn() { INDArray input = Nd4j.create(new long[] {1, 2, 4, 4}); - MultiLayerNetwork net = new MultiLayerNetwork(new NeuralNetConfiguration.Builder() + MultiLayerNetwork net = new MultiLayerNetwork( NeuralNetConfiguration.builder() .list() - .setInputType(InputType.inferInputTypes(input)[0]) + .inputType(InputType.inferInputTypes(input)[0]) .layer(new Convolution2D.Builder(1, 1).nOut(10).build()) .layer(new SubsamplingLayer.Builder(1,1).build()) .layer(new Convolution2D.Builder(1, 1).nOut(7).build()) @@ -703,7 +705,7 @@ public class TransferLearningMLNTest extends BaseDL4JTest { @Test public void testTransferLearningSameDiffLayers(){ - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .activation(Activation.TANH) .updater(new Adam(0.01)) @@ -714,7 +716,7 @@ public class TransferLearningMLNTest extends BaseDL4JTest { .layer(new GlobalPoolingLayer.Builder().poolingType(PoolingType.MAX).build()) .layer(new OutputLayer.Builder().nOut(2).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) - .setInputType(InputType.recurrent(4)) + .inputType(InputType.recurrent(4)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -733,8 +735,8 @@ public class TransferLearningMLNTest extends BaseDL4JTest { net2.setParam("3_W", net.getParam("3_W")); net2.setParam("3_b", net.getParam("3_b")); - Map p1 = net.paramTable(); - Map p2 = net2.paramTable(); + Map p1 = net.getParamTable(); + Map p2 = net2.getParamTable(); for(String s : p1.keySet()){ INDArray i1 = p1.get(s); INDArray i2 = p2.get(s); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/updater/TestGradientNormalization.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/updater/TestGradientNormalization.java index 02616d66d..63c936b17 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/updater/TestGradientNormalization.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/updater/TestGradientNormalization.java @@ -46,15 +46,15 @@ public class TestGradientNormalization extends BaseDL4JTest { public void testRenormalizatonPerLayer() { Nd4j.getRandom().setSeed(12345); - NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .layer(new DenseLayer.Builder().nIn(10).nOut(20) .updater(new NoOp()) .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer).build()) .build(); - long numParams = conf.getLayer().initializer().numParams(conf); + long numParams = conf.getFirstLayer().initializer().numParams(conf); INDArray params = Nd4j.create(1, numParams); - Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true, params.dataType()); + Layer layer = conf.getFirstLayer().instantiate(conf, null, 0, params, true, params.dataType()); INDArray gradArray = Nd4j.rand(1, 220).muli(10).subi(5); layer.setBackpropGradientsViewArray(gradArray); INDArray weightGrad = Shape.newShapeNoCopy(gradArray.get(NDArrayIndex.point(0), NDArrayIndex.interval(0, 200)), @@ -92,15 +92,15 @@ public class TestGradientNormalization extends BaseDL4JTest { public void testRenormalizationPerParamType() { Nd4j.getRandom().setSeed(12345); - NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .layer(new DenseLayer.Builder().nIn(10).nOut(20) .updater(new NoOp()) .gradientNormalization(GradientNormalization.RenormalizeL2PerParamType).build()) .build(); - long numParams = conf.getLayer().initializer().numParams(conf); + long numParams = conf.getFirstLayer().initializer().numParams(conf); INDArray params = Nd4j.create(1, numParams); - Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true, params.dataType()); + Layer layer = conf.getFirstLayer().instantiate(conf, null, 0, params, true, params.dataType()); layer.setBackpropGradientsViewArray(Nd4j.create(params.shape())); Updater updater = UpdaterCreator.getUpdater(layer); INDArray weightGrad = Nd4j.rand(10, 20); @@ -125,15 +125,15 @@ public class TestGradientNormalization extends BaseDL4JTest { Nd4j.getRandom().setSeed(12345); double threshold = 3; - NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().layer( + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().layer( new DenseLayer.Builder().nIn(10).nOut(20).updater(new NoOp()) .gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue) .gradientNormalizationThreshold(threshold).build()) .build(); - long numParams = conf.getLayer().initializer().numParams(conf); + long numParams = conf.getFirstLayer().initializer().numParams(conf); INDArray params = Nd4j.create(1, numParams); - Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true, params.dataType()); + Layer layer = conf.getFirstLayer().instantiate(conf, null, 0, params, true, params.dataType()); INDArray gradArray = Nd4j.rand(1, 220).muli(10).subi(5); layer.setBackpropGradientsViewArray(gradArray); INDArray weightGrad = Shape.newShapeNoCopy(gradArray.get(NDArrayIndex.point(0), NDArrayIndex.interval(0, 200)), @@ -181,15 +181,15 @@ public class TestGradientNormalization extends BaseDL4JTest { //t=0: small -> no clipping //t=1: large -> clipping - NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().layer( + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().layer( new DenseLayer.Builder().nIn(10).nOut(20).updater(new NoOp()) .gradientNormalization(GradientNormalization.ClipL2PerLayer) .gradientNormalizationThreshold(threshold).build()) .build(); - val numParams = conf.getLayer().initializer().numParams(conf); + val numParams = conf.getFirstLayer().initializer().numParams(conf); INDArray params = Nd4j.create(1, numParams); - Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true, params.dataType()); + Layer layer = conf.getFirstLayer().instantiate(conf, null, 0, params, true, params.dataType()); INDArray gradArray = Nd4j.rand(1, 220).muli(t == 0 ? 0.05 : 10).subi(t == 0 ? 0 : 5); layer.setBackpropGradientsViewArray(gradArray); INDArray weightGrad = @@ -236,15 +236,15 @@ public class TestGradientNormalization extends BaseDL4JTest { Nd4j.getRandom().setSeed(12345); double threshold = 3; - NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().layer( + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().layer( new DenseLayer.Builder().nIn(10).nOut(20).updater(new NoOp()) .gradientNormalization(GradientNormalization.ClipL2PerParamType) .gradientNormalizationThreshold(threshold).build()) .build(); - val numParams = conf.getLayer().initializer().numParams(conf); + val numParams = conf.getFirstLayer().initializer().numParams(conf); INDArray params = Nd4j.create(1, numParams); - Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true, params.dataType()); + Layer layer = conf.getFirstLayer().instantiate(conf, null, 0, params, true, params.dataType()); layer.setBackpropGradientsViewArray(Nd4j.create(params.shape())); Updater updater = UpdaterCreator.getUpdater(layer); INDArray weightGrad = Nd4j.rand(10, 20).muli(0.05); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/updater/TestUpdaters.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/updater/TestUpdaters.java index 462143897..cf73bb012 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/updater/TestUpdaters.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/updater/TestUpdaters.java @@ -26,7 +26,6 @@ import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.Updater; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.*; import org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder; @@ -89,15 +88,15 @@ public class TestUpdaters extends BaseDL4JTest { double rho = 0.85; - NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut) .updater(new AdaDelta(rho, Nd4j.EPS_THRESHOLD)) .build()) .build(); - long numParams = conf.getLayer().initializer().numParams(conf); + long numParams = conf.getFirstLayer().initializer().numParams(conf); INDArray params = Nd4j.create(1, numParams); - BaseLayer layer = (BaseLayer) conf.getLayer().instantiate(conf, null, 0, params, true, params.dataType()); + BaseLayer layer = (BaseLayer) conf.getFirstLayer().instantiate(conf, null, 0, params, true, params.dataType()); layer.setBackpropGradientsViewArray(gradients); Updater updater = UpdaterCreator.getUpdater(layer); int updaterStateSize = (int) layer.layerConf().getIUpdater().stateSize(numParams); @@ -157,13 +156,13 @@ public class TestUpdaters extends BaseDL4JTest { double epsilon = AdaGrad.DEFAULT_ADAGRAD_EPSILON; NeuralNetConfiguration conf = - new NeuralNetConfiguration.Builder().updater(new AdaGrad(lr)) + NeuralNetConfiguration.builder().updater(new AdaGrad(lr)) .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).build()) .build(); - long numParams = conf.getLayer().initializer().numParams(conf); + long numParams = conf.getFirstLayer().initializer().numParams(conf); INDArray params = Nd4j.create(1, numParams); - BaseLayer layer = (BaseLayer) conf.getLayer().instantiate(conf, null, 0, params, true, params.dataType()); + BaseLayer layer = (BaseLayer) conf.getFirstLayer().instantiate(conf, null, 0, params, true, params.dataType()); layer.setBackpropGradientsViewArray(gradients); Updater updater = UpdaterCreator.getUpdater(layer); int updaterStateSize = (int) layer.layerConf().getIUpdater().stateSize(numParams); @@ -201,13 +200,13 @@ public class TestUpdaters extends BaseDL4JTest { double epsilon = Adam.DEFAULT_ADAM_EPSILON; - NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().updater(new Adam(lr, beta1, beta2, Adam.DEFAULT_ADAM_EPSILON)) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new Adam(lr, beta1, beta2, Adam.DEFAULT_ADAM_EPSILON)) .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).build()) .build(); - long numParams = conf.getLayer().initializer().numParams(conf); + long numParams = conf.getFirstLayer().initializer().numParams(conf); INDArray params = Nd4j.create(1, numParams); - BaseLayer layer = (BaseLayer) conf.getLayer().instantiate(conf, null, 0, params, true, params.dataType()); + BaseLayer layer = (BaseLayer) conf.getFirstLayer().instantiate(conf, null, 0, params, true, params.dataType()); layer.setBackpropGradientsViewArray(gradients); Updater updater = UpdaterCreator.getUpdater(layer); int updaterStateSize = (int) layer.layerConf().getIUpdater().stateSize(numParams); @@ -261,16 +260,16 @@ public class TestUpdaters extends BaseDL4JTest { double epsilon = Nadam.DEFAULT_NADAM_EPSILON; NeuralNetConfiguration conf = - new NeuralNetConfiguration.Builder() + NeuralNetConfiguration.builder() .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut) .updater(Nadam.builder().learningRate(lr).beta1(beta1) .beta2(beta2).epsilon(epsilon).build()) .build()) .build(); - long numParams = conf.getLayer().initializer().numParams(conf); + long numParams = conf.getFirstLayer().initializer().numParams(conf); INDArray params = Nd4j.create(1, numParams); - BaseLayer layer = (BaseLayer) conf.getLayer().instantiate(conf, null, 0, params, true, params.dataType()); + BaseLayer layer = (BaseLayer) conf.getFirstLayer().instantiate(conf, null, 0, params, true, params.dataType()); layer.setBackpropGradientsViewArray(gradients); Updater updater = UpdaterCreator.getUpdater(layer); @@ -353,14 +352,14 @@ public class TestUpdaters extends BaseDL4JTest { double beta2 = 0.888; double epsilon = AdaMax.DEFAULT_ADAMAX_EPSILON; - NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .updater(new AdaMax(lr, beta1, beta2, AdaMax.DEFAULT_ADAMAX_EPSILON)) .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).build()) .build(); - long numParams = conf.getLayer().initializer().numParams(conf); + long numParams = conf.getFirstLayer().initializer().numParams(conf); INDArray params = Nd4j.create(1, numParams); - BaseLayer layer = (BaseLayer) conf.getLayer().instantiate(conf, null, 0, params, true, params.dataType()); + BaseLayer layer = (BaseLayer) conf.getFirstLayer().instantiate(conf, null, 0, params, true, params.dataType()); layer.setBackpropGradientsViewArray(gradients); Updater updater = UpdaterCreator.getUpdater(layer); int updaterStateSize = (int) layer.layerConf().getIUpdater().stateSize(numParams); @@ -410,13 +409,13 @@ public class TestUpdaters extends BaseDL4JTest { double mu = 0.6; NeuralNetConfiguration conf = - new NeuralNetConfiguration.Builder().updater(new Nesterovs(lr, mu)) + NeuralNetConfiguration.builder().updater(new Nesterovs(lr, mu)) .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).build()) .build(); - long numParams = conf.getLayer().initializer().numParams(conf); + long numParams = conf.getFirstLayer().initializer().numParams(conf); INDArray params = Nd4j.create(1, numParams); - BaseLayer layer = (BaseLayer) conf.getLayer().instantiate(conf, null, 0, params, true, params.dataType()); + BaseLayer layer = (BaseLayer) conf.getFirstLayer().instantiate(conf, null, 0, params, true, params.dataType()); layer.setBackpropGradientsViewArray(gradients); Updater updater = UpdaterCreator.getUpdater(layer); int updaterStateSize = (int) layer.layerConf().getIUpdater().stateSize(numParams); @@ -457,13 +456,13 @@ public class TestUpdaters extends BaseDL4JTest { NeuralNetConfiguration conf = - new NeuralNetConfiguration.Builder().updater(new RmsProp(lr,rmsDecay, RmsProp.DEFAULT_RMSPROP_EPSILON)) + NeuralNetConfiguration.builder().updater(new RmsProp(lr,rmsDecay, RmsProp.DEFAULT_RMSPROP_EPSILON)) .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).build()) .build(); - long numParams = conf.getLayer().initializer().numParams(conf); + long numParams = conf.getFirstLayer().initializer().numParams(conf); INDArray params = Nd4j.create(1, numParams); - BaseLayer layer = (BaseLayer) conf.getLayer().instantiate(conf, null, 0, params, true, params.dataType()); + BaseLayer layer = (BaseLayer) conf.getFirstLayer().instantiate(conf, null, 0, params, true, params.dataType()); layer.setBackpropGradientsViewArray(gradients); Updater updater = UpdaterCreator.getUpdater(layer); int updaterStateSize = (int) layer.layerConf().getIUpdater().stateSize(numParams); @@ -504,13 +503,13 @@ public class TestUpdaters extends BaseDL4JTest { double lr = 0.05; NeuralNetConfiguration conf = - new NeuralNetConfiguration.Builder().updater(new Sgd(lr)) + NeuralNetConfiguration.builder().updater(new Sgd(lr)) .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).build()) .build(); - long numParams = conf.getLayer().initializer().numParams(conf); + long numParams = conf.getFirstLayer().initializer().numParams(conf); INDArray params = Nd4j.create(1, numParams); - BaseLayer layer = (BaseLayer) conf.getLayer().instantiate(conf, null, 0, params, true, params.dataType()); + BaseLayer layer = (BaseLayer) conf.getFirstLayer().instantiate(conf, null, 0, params, true, params.dataType()); layer.setBackpropGradientsViewArray(gradients); Updater updater = UpdaterCreator.getUpdater(layer); @@ -538,13 +537,13 @@ public class TestUpdaters extends BaseDL4JTest { double lr = 0.5; NeuralNetConfiguration conf = - new NeuralNetConfiguration.Builder().updater(new NoOp()) + NeuralNetConfiguration.builder().updater(new NoOp()) .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).build()) .build(); - long numParams = conf.getLayer().initializer().numParams(conf); + long numParams = conf.getFirstLayer().initializer().numParams(conf); INDArray params = Nd4j.create(1, numParams); - Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true, params.dataType()); + Layer layer = conf.getFirstLayer().instantiate(conf, null, 0, params, true, params.dataType()); layer.setBackpropGradientsViewArray(gradients); Updater updater = UpdaterCreator.getUpdater(layer); @@ -574,7 +573,7 @@ public class TestUpdaters extends BaseDL4JTest { Nd4j.getRandom().setSeed(12345L); double lr = 0.03; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() .layer(0, new DenseLayer.Builder().nIn(4).nOut(5).updater(new Sgd(lr)).build()) .layer(1, new DenseLayer.Builder().nIn(5).nOut(6) .updater(new NoOp()).build()) @@ -675,7 +674,7 @@ public class TestUpdaters extends BaseDL4JTest { int nIn = 4; int nOut = 8; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new Nesterovs(lr,0.6)).list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new Nesterovs(lr,0.6)).list() .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(5) .updater(org.deeplearning4j.nn.conf.Updater.SGD).build()) .layer(1, new DenseLayer.Builder().nIn(5).nOut(6) @@ -706,7 +705,7 @@ public class TestUpdaters extends BaseDL4JTest { int nIn = 4; int nOut = 8; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new Nesterovs(lr,0.6)).list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new Nesterovs(lr,0.6)).list() .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(5) .updater(org.deeplearning4j.nn.conf.Updater.SGD).build()) .layer(1, new DenseLayer.Builder().nIn(5).nOut(6) @@ -743,14 +742,14 @@ public class TestUpdaters extends BaseDL4JTest { gradient.setGradientFor(PretrainParamInitializer.VISIBLE_BIAS_KEY, vbiasGradient); - NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().updater(new Sgd(lr)).seed(42) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new Sgd(lr)).seed(42) .layer(new AutoEncoder.Builder() .lossFunction(LossFunctions.LossFunction.COSINE_PROXIMITY) .activation(Activation.IDENTITY).nIn(nIn).nOut(nOut).build()) .build(); - long numParams = conf.getLayer().initializer().numParams(conf); + long numParams = conf.getFirstLayer().initializer().numParams(conf); INDArray params = Nd4j.create(1, numParams); - BaseLayer layer = (BaseLayer) conf.getLayer().instantiate(conf, null, 0, params, true, params.dataType()); + BaseLayer layer = (BaseLayer) conf.getFirstLayer().instantiate(conf, null, 0, params, true, params.dataType()); layer.setBackpropGradientsViewArray(gradients); Updater updater = UpdaterCreator.getUpdater(layer); @@ -795,7 +794,7 @@ public class TestUpdaters extends BaseDL4JTest { gradientCopyPreUpdate.setFlattenedGradient(g); params = Nd4j.create(1, numParams); - layer = (BaseLayer) conf.getLayer().instantiate(conf, null, 0, params, true, params.dataType()); + layer = (BaseLayer) conf.getFirstLayer().instantiate(conf, null, 0, params, true, params.dataType()); layer.setBackpropGradientsViewArray(gradients); updater = UpdaterCreator.getUpdater(layer); assertEquals(lr, ((Sgd)layer.layerConf().getIUpdater()).getLearningRate(), 1e-4); @@ -807,7 +806,7 @@ public class TestUpdaters extends BaseDL4JTest { List blocks; if (i == 0) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).name("l0") .updater(new Adam(0.5)).build()) .layer(1, new DenseLayer.Builder().nIn(10).nOut(10).name("l1") @@ -827,7 +826,7 @@ public class TestUpdaters extends BaseDL4JTest { MultiLayerUpdater u = (MultiLayerUpdater) net.getUpdater(); blocks = u.getUpdaterBlocks(); } else { - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .graphBuilder().addInputs("in") .addLayer("l0", new DenseLayer.Builder().nIn(10).nOut(10) .updater(new Adam(0.5)).build(), "in") @@ -940,8 +939,8 @@ public class TestUpdaters extends BaseDL4JTest { public void testUpdaterBlockVae() { List blocks; - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().updater(new Adam(0.5)).list() + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder().updater(new Adam(0.5)).list() .layer(0, new VariationalAutoencoder.Builder().nIn(8).nOut(12) .encoderLayerSizes(10, 11).decoderLayerSizes(13, 14).build()) .build(); @@ -981,7 +980,7 @@ public class TestUpdaters extends BaseDL4JTest { public void testDivisionByMinibatch1(){ //No batch norm - should be single INDArray equal to flattened gradient view - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() .layer(new DenseLayer.Builder().nIn(10).nOut(10).build()) .layer(new DenseLayer.Builder().nIn(10).nOut(10).build()) @@ -1008,7 +1007,7 @@ public class TestUpdaters extends BaseDL4JTest { //With batch norm - should be multiple 'division by minibatch' array segments //i.e., exclude batch norm mean/variance - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() .layer(new DenseLayer.Builder().nIn(10).nOut(9).build()) .layer(new BatchNormalization.Builder().nOut(9).build()) @@ -1059,7 +1058,7 @@ public class TestUpdaters extends BaseDL4JTest { //With batch norm - should be multiple 'division by minibatch' array segments //i.e., exclude batch norm mean/variance - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() .layer(new BatchNormalization.Builder().nOut(6).build()) .layer(new ConvolutionLayer.Builder().nIn(6).nOut(5).kernelSize(2,2).build()) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/updater/custom/TestCustomUpdater.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/updater/custom/TestCustomUpdater.java index 170c6bdc1..e5caf981f 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/updater/custom/TestCustomUpdater.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/updater/custom/TestCustomUpdater.java @@ -21,7 +21,6 @@ package org.deeplearning4j.nn.updater.custom; import org.deeplearning4j.BaseDL4JTest; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.BaseLayer; import org.deeplearning4j.nn.conf.layers.DenseLayer; @@ -47,7 +46,7 @@ public class TestCustomUpdater extends BaseDL4JTest { double lr = 0.03; Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf1 = new NeuralNetConfiguration.Builder().seed(12345) + NeuralNetConfiguration conf1 = NeuralNetConfiguration.builder().seed(12345) .activation(Activation.TANH).updater(new CustomIUpdater(lr)) //Specify custom IUpdater .list().layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()) .layer(1, new OutputLayer.Builder().nIn(10).nOut(10) @@ -55,7 +54,7 @@ public class TestCustomUpdater extends BaseDL4JTest { .build(); Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder().seed(12345) + NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder().seed(12345) .activation(Activation.TANH).updater(new Sgd(lr)).list() .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()).layer(1, new OutputLayer.Builder() .nIn(10).nOut(10).lossFunction(LossFunctions.LossFunction.MSE).build()) @@ -80,7 +79,7 @@ public class TestCustomUpdater extends BaseDL4JTest { //Second: check JSON String asJson = conf1.toJson(); - MultiLayerConfiguration fromJson = MultiLayerConfiguration.fromJson(asJson); + NeuralNetConfiguration fromJson = NeuralNetConfiguration.fromJson(asJson); assertEquals(conf1, fromJson); Nd4j.getRandom().setSeed(12345); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/weights/WeightInitIdentityTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/weights/WeightInitIdentityTest.java index 8b9b35e4f..b5becc819 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/weights/WeightInitIdentityTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/weights/WeightInitIdentityTest.java @@ -48,7 +48,7 @@ public class WeightInitIdentityTest extends BaseDL4JTest { final String inputName = "input"; final String conv = "conv"; final String output = "output"; - final ComputationGraph graph = new ComputationGraph(new NeuralNetConfiguration.Builder() + final ComputationGraph graph = new ComputationGraph(NeuralNetConfiguration.builder() .graphBuilder() .addInputs(inputName) .setOutputs(output) @@ -76,7 +76,7 @@ public class WeightInitIdentityTest extends BaseDL4JTest { final String inputName = "input"; final String conv = "conv"; final String output = "output"; - final ComputationGraph graph = new ComputationGraph(new NeuralNetConfiguration.Builder() + final ComputationGraph graph = new ComputationGraph(NeuralNetConfiguration.builder() .graphBuilder() .setInputTypes(InputType.inferInputType(input)) .addInputs(inputName) @@ -103,7 +103,7 @@ public class WeightInitIdentityTest extends BaseDL4JTest { final String inputName = "input"; final String conv = "conv"; final String output = "output"; - final ComputationGraph graph = new ComputationGraph(new NeuralNetConfiguration.Builder() + final ComputationGraph graph = new ComputationGraph(NeuralNetConfiguration.builder() .graphBuilder() .setInputTypes(InputType.inferInputType(input)) .addInputs(inputName) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimize/solver/BackTrackLineSearchTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimize/solver/BackTrackLineSearchTest.java index 8b73c10ee..692f0f44f 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimize/solver/BackTrackLineSearchTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimize/solver/BackTrackLineSearchTest.java @@ -24,7 +24,6 @@ import lombok.val; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator; import org.deeplearning4j.nn.api.OptimizationAlgorithm; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.layers.OutputLayer; @@ -127,7 +126,7 @@ public class BackTrackLineSearchTest extends BaseDL4JTest { double step = lineSearch.optimize(layer.params(), layer.gradient().gradient(), layer.gradient().gradient(), LayerWorkspaceMgr.noWorkspacesImmutable()); INDArray currParams = layer.params(); sf.step(currParams, origGradient, step); - layer.setParams(currParams); + layer.setParamsTable(currParams); layer.computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces()); score2 = layer.score(); @@ -157,7 +156,7 @@ public class BackTrackLineSearchTest extends BaseDL4JTest { INDArray currParams = layer.params(); sf.step(currParams, origGradient, step); - layer.setParams(currParams); + layer.setParamsTable(currParams); layer.computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces()); score2 = layer.score(); @@ -167,16 +166,16 @@ public class BackTrackLineSearchTest extends BaseDL4JTest { private static OutputLayer getIrisLogisticLayerConfig(Activation activationFunction, int maxIterations, LossFunctions.LossFunction lossFunction) { NeuralNetConfiguration conf = - new NeuralNetConfiguration.Builder().seed(12345L).miniBatch(true) + NeuralNetConfiguration.builder().seed(12345L).miniBatch(true) .maxNumLineSearchIterations(maxIterations) .layer(new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(lossFunction) .nIn(4).nOut(3).activation(activationFunction) .weightInit(WeightInit.XAVIER).build()) .build(); - val numParams = conf.getLayer().initializer().numParams(conf); + val numParams = conf.getFirstLayer().initializer().numParams(conf); INDArray params = Nd4j.create(1, numParams); - return (OutputLayer) conf.getLayer().instantiate(conf, null, 0, params, true, params.dataType()); + return (OutputLayer) conf.getFirstLayer().instantiate(conf, null, 0, params, true, params.dataType()); } /////////////////////////////////////////////////////////////////////////// @@ -239,8 +238,8 @@ public class BackTrackLineSearchTest extends BaseDL4JTest { } - private static MultiLayerConfiguration getIrisMultiLayerConfig(Activation activationFunction, OptimizationAlgorithm optimizer) { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(optimizer) + private static NeuralNetConfiguration getIrisMultiLayerConfig(Activation activationFunction, OptimizationAlgorithm optimizer) { + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().optimizationAlgo(optimizer) .updater(new Adam(0.01)).seed(12345L).list() .layer(0, new DenseLayer.Builder().nIn(4).nOut(100).weightInit(WeightInit.XAVIER) .activation(activationFunction).build()) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimize/solver/TestOptimizers.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimize/solver/TestOptimizers.java index 73e1a7a56..7753fae33 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimize/solver/TestOptimizers.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimize/solver/TestOptimizers.java @@ -21,13 +21,14 @@ package org.deeplearning4j.optimize.solver; import lombok.val; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator; import org.deeplearning4j.nn.api.*; import org.deeplearning4j.nn.conf.CacheMode; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.DenseLayer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; @@ -134,8 +135,8 @@ public class TestOptimizers extends BaseDL4JTest { } } - private static MultiLayerConfiguration getMLPConfigIris(OptimizationAlgorithm oa) { - MultiLayerConfiguration c = new NeuralNetConfiguration.Builder().optimizationAlgo(oa) + private static NeuralNetConfiguration getMLPConfigIris(OptimizationAlgorithm oa) { + NeuralNetConfiguration c = NeuralNetConfiguration.builder().optimizationAlgo(oa) .updater(new AdaGrad(1e-1)).seed(12345L) .list().layer(0, new DenseLayer.Builder().nIn(4).nOut(3).weightInit(WeightInit.XAVIER) @@ -206,15 +207,15 @@ public class TestOptimizers extends BaseDL4JTest { System.out.println("---------\n Alg= " + oa + ", nIter= " + numLineSearchIter + ", nDimensions= " + nDimensions); - NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().maxNumLineSearchIterations(numLineSearchIter) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().maxNumLineSearchIterations(numLineSearchIter) .updater(new Sgd(1e-2)) .layer(new DenseLayer.Builder().nIn(1).nOut(1).build()).build(); - conf.addVariable("W"); //Normally done by ParamInitializers, but obviously that isn't done here + conf.addNetWideVariable("W"); //Normally done by ParamInitializers, but obviously that isn't done here Random rng = new DefaultRandom(12345L); org.nd4j.linalg.api.rng.distribution.Distribution dist = new org.nd4j.linalg.api.rng.distribution.impl.UniformDistribution(rng, -10, 10); - Model m = new SphereFunctionModel(nDimensions, dist, conf); + IModel m = new SphereFunctionModel(nDimensions, dist, conf); m.computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces()); double scoreBefore = m.score(); assertTrue(!Double.isNaN(scoreBefore) && !Double.isInfinite(scoreBefore)); @@ -246,7 +247,7 @@ public class TestOptimizers extends BaseDL4JTest { assertTrue( scoreAfter < scoreBefore, "Score did not improve after optimization (b= " + scoreBefore + " ,a= " + scoreAfter + ")"); } - private static ConvexOptimizer getOptimizer(OptimizationAlgorithm oa, NeuralNetConfiguration conf, Model m) { + private static ConvexOptimizer getOptimizer(OptimizationAlgorithm oa, NeuralNetConfiguration conf, IModel m) { switch (oa) { case STOCHASTIC_GRADIENT_DESCENT: return new StochasticGradientDescent(conf, new NegativeDefaultStepFunction(), null, m); @@ -269,12 +270,12 @@ public class TestOptimizers extends BaseDL4JTest { Random rng = new DefaultRandom(12345L); org.nd4j.linalg.api.rng.distribution.Distribution dist = new org.nd4j.linalg.api.rng.distribution.impl.UniformDistribution(rng, -10, 10); - NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .maxNumLineSearchIterations(maxNumLineSearchIter).updater(new Sgd(0.1)) .layer(new DenseLayer.Builder().nIn(1).nOut(1).build()).build(); - conf.addVariable("W"); //Normally done by ParamInitializers, but obviously that isn't done here + conf.addNetWideVariable("W"); //Normally done by ParamInitializers, but obviously that isn't done here - Model m = new SphereFunctionModel(100, dist, conf); + IModel m = new SphereFunctionModel(100, dist, conf); if (i == 0) { m.computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces()); scores[0] = m.score(); //Before optimization @@ -404,13 +405,13 @@ public class TestOptimizers extends BaseDL4JTest { double[] scores = new double[nOptIter + 1]; for (int i = 0; i <= nOptIter; i++) { - NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .maxNumLineSearchIterations(maxNumLineSearchIter).miniBatch(false) .updater(new AdaGrad(1e-2)) .layer(new DenseLayer.Builder().nIn(1).nOut(1).build()).build(); - conf.addVariable("W"); //Normally done by ParamInitializers, but obviously that isn't done here + conf.addNetWideVariable("W"); //Normally done by ParamInitializers, but obviously that isn't done here - Model m = new RastriginFunctionModel(10, conf); + IModel m = new RastriginFunctionModel(10, conf); int nParams = (int)m.numParams(); if (i == 0) { m.computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces()); @@ -587,15 +588,15 @@ public class TestOptimizers extends BaseDL4JTest { double[] scores = new double[nOptIter + 1]; for (int i = 0; i <= nOptIter; i++) { - NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .maxNumLineSearchIterations(maxNumLineSearchIter) .updater(new Sgd(1e-1)) .stepFunction(new org.deeplearning4j.nn.conf.stepfunctions.NegativeDefaultStepFunction()) .layer(new DenseLayer.Builder().nIn(1).nOut(1).build()) .build(); - conf.addVariable("W"); //Normally done by ParamInitializers, but obviously that isn't done here + conf.addNetWideVariable("W"); //Normally done by ParamInitializers, but obviously that isn't done here - Model m = new RosenbrockFunctionModel(100, conf); + IModel m = new RosenbrockFunctionModel(100, conf); if (i == 0) { m.computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces()); scores[0] = m.score(); //Before optimization @@ -768,7 +769,7 @@ public class TestOptimizers extends BaseDL4JTest { * methods here. Classes extending this model for optimizer tests need only implement the score() and * gradient() methods. */ - private static abstract class SimpleOptimizableModel implements Model, Layer { + private static abstract class SimpleOptimizableModel implements IModel, Layer { private static final long serialVersionUID = 4409380971404019303L; protected INDArray parameters; protected INDArray gradientView; @@ -784,6 +785,16 @@ public class TestOptimizers extends BaseDL4JTest { this.conf = conf; } + /** + * Return the configuration of this layer + * + * @return the configuration + */ + @Override + public LayerConfiguration getLayerConfiguration() { + return this.conf.getFirstLayer(); + } + @Override public void addListeners(TrainingListener... listener) { // no-op @@ -791,7 +802,7 @@ public class TestOptimizers extends BaseDL4JTest { @Override public TrainingConfig getConfig() { - return conf.getLayer(); + return conf.getFirstLayer(); } /** @@ -896,12 +907,12 @@ public class TestOptimizers extends BaseDL4JTest { } @Override - public NeuralNetConfiguration conf() { + public NeuralNetConfiguration getNetConfiguration() { return conf; } @Override - public void setConf(NeuralNetConfiguration conf) { + public void setLayerConfiguration(NeuralNetConfiguration layerConfiguration) { throw new UnsupportedOperationException(); } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimizer/listener/TestCheckpointListener.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimizer/listener/TestCheckpointListener.java index 4c3760d95..6f422fda1 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimizer/listener/TestCheckpointListener.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimizer/listener/TestCheckpointListener.java @@ -22,7 +22,6 @@ package org.deeplearning4j.optimizer.listener; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; @@ -51,7 +50,7 @@ public class TestCheckpointListener extends BaseDL4JTest { public File tempDir; private static Pair getNetAndData(){ - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() .layer(new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimizer/listener/TestFailureListener.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimizer/listener/TestFailureListener.java index 81786baa7..a1933c247 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimizer/listener/TestFailureListener.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimizer/listener/TestFailureListener.java @@ -22,7 +22,6 @@ package org.deeplearning4j.optimizer.listener; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; @@ -51,7 +50,7 @@ public class TestFailureListener extends BaseDL4JTest { @Test public void testFailureIter5() throws Exception { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .updater(new Adam(1e-4)) .list() .layer(0, new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build()) @@ -73,7 +72,7 @@ public class TestFailureListener extends BaseDL4JTest { @Test public void testFailureRandom_OR(){ - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .updater(new Adam(1e-4)) .list() .layer(0, new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build()) @@ -101,7 +100,7 @@ public class TestFailureListener extends BaseDL4JTest { @Test public void testFailureRandom_AND() throws Exception { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .updater(new Adam(1e-4)) .list() .layer(0, new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build()) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimizer/listener/TestListeners.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimizer/listener/TestListeners.java index 55b1d39c8..b335d43a6 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimizer/listener/TestListeners.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimizer/listener/TestListeners.java @@ -22,14 +22,13 @@ package org.deeplearning4j.optimizer.listener; import lombok.Data; import lombok.extern.slf4j.Slf4j; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.core.storage.StatsStorageRouter; import org.deeplearning4j.core.storage.listener.RoutingIterationListener; import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator; import org.deeplearning4j.nn.api.Layer; -import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.AutoEncoder; import org.deeplearning4j.nn.conf.layers.OutputLayer; @@ -71,7 +70,7 @@ public class TestListeners extends BaseDL4JTest { public void testSettingListenersUnsupervised() { //Pretrain layers should get copies of the listeners, in addition to the - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() .layer(0, new AutoEncoder.Builder().nIn(10).nOut(10).build()) .layer(1, new VariationalAutoencoder.Builder().nIn(10).nOut(10).build()).build(); @@ -95,7 +94,7 @@ public class TestListeners extends BaseDL4JTest { assertTrue(lArr[1] instanceof TestRoutingListener); - ComputationGraphConfiguration gConf = new NeuralNetConfiguration.Builder().graphBuilder().addInputs("in") + ComputationGraphConfiguration gConf = NeuralNetConfiguration.builder().graphBuilder().addInputs("in") .addLayer("0", new AutoEncoder.Builder().nIn(10).nOut(10).build(), "in") .addLayer("1", new VariationalAutoencoder.Builder().nIn(10).nOut(10).build(), "0") .setOutputs("1").build(); @@ -151,7 +150,7 @@ public class TestListeners extends BaseDL4JTest { } @Override - public void iterationDone(Model model, int iteration, int epoch) {} + public void iterationDone(IModel model, int iteration, int epoch) {} } @@ -172,7 +171,7 @@ public class TestListeners extends BaseDL4JTest { DataSetIterator iter = new IrisDataSetIterator(10, 150); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() .layer(new OutputLayer.Builder().nIn(4).nOut(3) .activation(Activation.SOFTMAX) @@ -208,7 +207,7 @@ public class TestListeners extends BaseDL4JTest { @Test public void testListenerCalls(){ - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() .layer(new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); @@ -284,37 +283,37 @@ public class TestListeners extends BaseDL4JTest { @Override - public void iterationDone(Model model, int iteration, int epoch) { + public void iterationDone(IModel model, int iteration, int epoch) { calls.add(new Triple<>(Call.ITER_DONE, iteration, epoch)); } @Override - public void onEpochStart(Model model) { + public void onEpochStart(IModel model) { calls.add(new Triple<>(Call.EPOCH_START, BaseOptimizer.getIterationCount(model), BaseOptimizer.getEpochCount(model))); } @Override - public void onEpochEnd(Model model) { + public void onEpochEnd(IModel model) { calls.add(new Triple<>(Call.EPOCH_END, BaseOptimizer.getIterationCount(model), BaseOptimizer.getEpochCount(model))); } @Override - public void onForwardPass(Model model, List activations) { + public void onForwardPass(IModel model, List activations) { calls.add(new Triple<>(Call.ON_FWD, BaseOptimizer.getIterationCount(model), BaseOptimizer.getEpochCount(model))); } @Override - public void onForwardPass(Model model, Map activations) { + public void onForwardPass(IModel model, Map activations) { calls.add(new Triple<>(Call.ON_FWD, BaseOptimizer.getIterationCount(model), BaseOptimizer.getEpochCount(model))); } @Override - public void onGradientCalculation(Model model) { + public void onGradientCalculation(IModel model) { calls.add(new Triple<>(Call.ON_GRAD, BaseOptimizer.getIterationCount(model), BaseOptimizer.getEpochCount(model))); } @Override - public void onBackwardPass(Model model) { + public void onBackwardPass(IModel model) { calls.add(new Triple<>(Call.ON_BWD, BaseOptimizer.getIterationCount(model), BaseOptimizer.getEpochCount(model))); } } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/parallelism/RandomTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/parallelism/RandomTests.java index 97a1cb799..114d90887 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/parallelism/RandomTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/parallelism/RandomTests.java @@ -20,9 +20,8 @@ package org.deeplearning4j.parallelism; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.BaseDL4JTest; -import org.deeplearning4j.nn.api.Model; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.WorkspaceMode; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -52,13 +51,13 @@ public class RandomTests extends BaseDL4JTest { */ @Test public void testModelInitialParamsEquality1() throws Exception { - final List models = new CopyOnWriteArrayList<>(); + final List models = new CopyOnWriteArrayList<>(); for (int i = 0; i < 4; i++) { Thread thread = new Thread(new Runnable() { @Override public void run() { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(119) // Training iterations as above + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(119) // Training iterations as above .l2(0.0005) //.learningRateDecayPolicy(LearningRatePolicy.Inverse).lrPolicyDecayRate(0.001).lrPolicyPower(0.75) .weightInit(WeightInit.XAVIER) @@ -78,7 +77,7 @@ public class RandomTests extends BaseDL4JTest { .layer(4, new DenseLayer.Builder().activation(Activation.RELU).nOut(500).build()) .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nOut(10).activation(Activation.SOFTMAX).build()) - .setInputType(InputType.convolutionalFlat(28, 28, 1)) //See note below + .inputType(InputType.convolutionalFlat(28, 28, 1)) //See note below .build(); MultiLayerNetwork network = new MultiLayerNetwork(conf); @@ -104,7 +103,7 @@ public class RandomTests extends BaseDL4JTest { public void testRngInitMLN() { Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).activation(Activation.TANH) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345).activation(Activation.TANH) .weightInit(WeightInit.XAVIER).list() .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()) .layer(1, new DenseLayer.Builder().nIn(10).nOut(10).build()).layer(2, @@ -122,7 +121,7 @@ public class RandomTests extends BaseDL4JTest { assertEquals(net1.params(), net2.params()); - MultiLayerConfiguration fromJson = MultiLayerConfiguration.fromJson(json); + NeuralNetConfiguration fromJson = NeuralNetConfiguration.fromJson(json); Nd4j.getRandom().setSeed(987654321); MultiLayerNetwork net3 = new MultiLayerNetwork(fromJson); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/perf/listener/TestSystemInfoPrintListener.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/perf/listener/TestSystemInfoPrintListener.java index 02e089090..c52f4943f 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/perf/listener/TestSystemInfoPrintListener.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/perf/listener/TestSystemInfoPrintListener.java @@ -24,7 +24,6 @@ import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.core.listener.SystemInfoFilePrintListener; import org.deeplearning4j.core.listener.SystemInfoPrintListener; import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; @@ -57,7 +56,7 @@ public class TestSystemInfoPrintListener extends BaseDL4JTest { .build(); tmpFile.deleteOnExit(); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() .layer(new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX).build()) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/MiscRegressionTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/MiscRegressionTests.java index 686501ff8..316ad2f46 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/MiscRegressionTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/MiscRegressionTests.java @@ -23,12 +23,11 @@ package org.deeplearning4j.regressiontest; import org.apache.commons.io.FileUtils; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.graph.GraphVertex; import org.deeplearning4j.nn.conf.graph.LayerVertex; import org.deeplearning4j.nn.conf.layers.DenseLayer; -import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.misc.FrozenLayer; import org.junit.jupiter.api.Test; import org.nd4j.common.io.ClassPathResource; @@ -55,7 +54,7 @@ public class MiscRegressionTests extends BaseDL4JTest { assertNotNull(gv); if(gv instanceof LayerVertex){ LayerVertex lv = (LayerVertex)gv; - Layer layer = lv.getLayerConf().getLayer(); + LayerConfiguration layer = lv.getNetConfiguration().getFirstLayer(); if(layer instanceof FrozenLayer) countFrozen++; } @@ -66,13 +65,13 @@ public class MiscRegressionTests extends BaseDL4JTest { @Test public void testFrozenNewFormat(){ - MultiLayerConfiguration configuration = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration configuration = NeuralNetConfiguration.builder() .list() .layer(0, new FrozenLayer(new DenseLayer.Builder().nIn(10).nOut(10).build())) .build(); String json = configuration.toJson(); - MultiLayerConfiguration fromJson = MultiLayerConfiguration.fromJson(json); + NeuralNetConfiguration fromJson = NeuralNetConfiguration.fromJson(json); assertEquals(configuration, fromJson); } } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest050.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest050.java index 022545685..50c177332 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest050.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest050.java @@ -23,7 +23,7 @@ package org.deeplearning4j.regressiontest; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.TestUtils; import org.deeplearning4j.nn.conf.ConvolutionMode; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.dropout.Dropout; import org.deeplearning4j.nn.conf.layers.*; @@ -65,8 +65,8 @@ public class RegressionTest050 extends BaseDL4JTest { MultiLayerNetwork net = ModelSerializer.restoreMultiLayerNetwork(f, true); - MultiLayerConfiguration conf = net.getLayerWiseConfigurations(); - assertEquals(2, conf.getConfs().size()); + NeuralNetConfiguration conf = net.getConfiguration(); + assertEquals(2, conf.getNetConfigurations().size()); DenseLayer l0 = (DenseLayer) conf.getConf(0).getLayer(); assertEquals("relu", l0.getActivationFn().toString()); @@ -99,8 +99,8 @@ public class RegressionTest050 extends BaseDL4JTest { MultiLayerNetwork net = ModelSerializer.restoreMultiLayerNetwork(f, true); - MultiLayerConfiguration conf = net.getLayerWiseConfigurations(); - assertEquals(2, conf.getConfs().size()); + NeuralNetConfiguration conf = net.getConfiguration(); + assertEquals(2, conf.getNetConfigurations().size()); DenseLayer l0 = (DenseLayer) conf.getConf(0).getLayer(); assertTrue(l0.getActivationFn() instanceof ActivationLReLU); @@ -138,8 +138,8 @@ public class RegressionTest050 extends BaseDL4JTest { MultiLayerNetwork net = ModelSerializer.restoreMultiLayerNetwork(f, true); - MultiLayerConfiguration conf = net.getLayerWiseConfigurations(); - assertEquals(3, conf.getConfs().size()); + NeuralNetConfiguration conf = net.getConfiguration(); + assertEquals(3, conf.getNetConfigurations().size()); ConvolutionLayer l0 = (ConvolutionLayer) conf.getConf(0).getLayer(); assertEquals("tanh", l0.getActivationFn().toString()); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest060.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest060.java index 87a53e54a..9b0870b0f 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest060.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest060.java @@ -25,7 +25,7 @@ import org.deeplearning4j.TestUtils; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.GradientNormalization; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.dropout.Dropout; import org.deeplearning4j.nn.conf.graph.LayerVertex; @@ -67,8 +67,8 @@ public class RegressionTest060 extends BaseDL4JTest { MultiLayerNetwork net = ModelSerializer.restoreMultiLayerNetwork(f, true); - MultiLayerConfiguration conf = net.getLayerWiseConfigurations(); - assertEquals(2, conf.getConfs().size()); + NeuralNetConfiguration conf = net.getConfiguration(); + assertEquals(2, conf.getNetConfigurations().size()); DenseLayer l0 = (DenseLayer) conf.getConf(0).getLayer(); assertEquals("relu", l0.getActivationFn().toString()); @@ -101,8 +101,8 @@ public class RegressionTest060 extends BaseDL4JTest { MultiLayerNetwork net = ModelSerializer.restoreMultiLayerNetwork(f, true); - MultiLayerConfiguration conf = net.getLayerWiseConfigurations(); - assertEquals(2, conf.getConfs().size()); + NeuralNetConfiguration conf = net.getConfiguration(); + assertEquals(2, conf.getNetConfigurations().size()); DenseLayer l0 = (DenseLayer) conf.getConf(0).getLayer(); assertTrue(l0.getActivationFn() instanceof ActivationLReLU); @@ -144,8 +144,8 @@ public class RegressionTest060 extends BaseDL4JTest { MultiLayerNetwork net = ModelSerializer.restoreMultiLayerNetwork(f, true); - MultiLayerConfiguration conf = net.getLayerWiseConfigurations(); - assertEquals(3, conf.getConfs().size()); + NeuralNetConfiguration conf = net.getConfiguration(); + assertEquals(3, conf.getNetConfigurations().size()); ConvolutionLayer l0 = (ConvolutionLayer) conf.getConf(0).getLayer(); assertEquals("tanh", l0.getActivationFn().toString()); @@ -190,8 +190,8 @@ public class RegressionTest060 extends BaseDL4JTest { MultiLayerNetwork net = ModelSerializer.restoreMultiLayerNetwork(f, true); - MultiLayerConfiguration conf = net.getLayerWiseConfigurations(); - assertEquals(3, conf.getConfs().size()); + NeuralNetConfiguration conf = net.getConfiguration(); + assertEquals(3, conf.getNetConfigurations().size()); GravesLSTM l0 = (GravesLSTM) conf.getConf(0).getLayer(); assertEquals("tanh", l0.getActivationFn().toString()); @@ -224,7 +224,7 @@ public class RegressionTest060 extends BaseDL4JTest { ComputationGraphConfiguration conf = net.getComputationGraphConfiguration(); assertEquals(3, conf.getVertices().size()); - GravesLSTM l0 = (GravesLSTM) ((LayerVertex) conf.getVertices().get("0")).getLayerConf().getLayer(); + GravesLSTM l0 = (GravesLSTM) ((LayerVertex) conf.getVertices().get("0")).getNetConfiguration().getFirstLayer(); assertEquals("tanh", l0.getActivationFn().toString()); assertEquals(3, l0.getNIn()); assertEquals(4, l0.getNOut()); @@ -232,14 +232,14 @@ public class RegressionTest060 extends BaseDL4JTest { assertEquals(1.5, l0.getGradientNormalizationThreshold(), 1e-5); GravesBidirectionalLSTM l1 = - (GravesBidirectionalLSTM) ((LayerVertex) conf.getVertices().get("1")).getLayerConf().getLayer(); + (GravesBidirectionalLSTM) ((LayerVertex) conf.getVertices().get("1")).getNetConfiguration().getFirstLayer(); assertEquals("softsign", l1.getActivationFn().toString()); assertEquals(4, l1.getNIn()); assertEquals(4, l1.getNOut()); assertEquals(GradientNormalization.ClipElementWiseAbsoluteValue, l1.getGradientNormalization()); assertEquals(1.5, l1.getGradientNormalizationThreshold(), 1e-5); - RnnOutputLayer l2 = (RnnOutputLayer) ((LayerVertex) conf.getVertices().get("2")).getLayerConf().getLayer(); + RnnOutputLayer l2 = (RnnOutputLayer) ((LayerVertex) conf.getVertices().get("2")).getNetConfiguration().getFirstLayer(); assertEquals(4, l2.getNIn()); assertEquals(5, l2.getNOut()); assertEquals("softmax", l2.getActivationFn().toString()); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest071.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest071.java index 0dc3839bb..e21f75680 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest071.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest071.java @@ -25,7 +25,7 @@ import org.deeplearning4j.TestUtils; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.GradientNormalization; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.dropout.Dropout; import org.deeplearning4j.nn.conf.graph.LayerVertex; @@ -68,8 +68,8 @@ public class RegressionTest071 extends BaseDL4JTest { MultiLayerNetwork net = ModelSerializer.restoreMultiLayerNetwork(f, true); - MultiLayerConfiguration conf = net.getLayerWiseConfigurations(); - assertEquals(2, conf.getConfs().size()); + NeuralNetConfiguration conf = net.getConfiguration(); + assertEquals(2, conf.getNetConfigurations().size()); DenseLayer l0 = (DenseLayer) conf.getConf(0).getLayer(); assertEquals("relu", l0.getActivationFn().toString()); @@ -102,8 +102,8 @@ public class RegressionTest071 extends BaseDL4JTest { MultiLayerNetwork net = ModelSerializer.restoreMultiLayerNetwork(f, true); - MultiLayerConfiguration conf = net.getLayerWiseConfigurations(); - assertEquals(2, conf.getConfs().size()); + NeuralNetConfiguration conf = net.getConfiguration(); + assertEquals(2, conf.getNetConfigurations().size()); DenseLayer l0 = (DenseLayer) conf.getConf(0).getLayer(); assertTrue(l0.getActivationFn() instanceof ActivationLReLU); @@ -145,8 +145,8 @@ public class RegressionTest071 extends BaseDL4JTest { MultiLayerNetwork net = ModelSerializer.restoreMultiLayerNetwork(f, true); - MultiLayerConfiguration conf = net.getLayerWiseConfigurations(); - assertEquals(3, conf.getConfs().size()); + NeuralNetConfiguration conf = net.getConfiguration(); + assertEquals(3, conf.getNetConfigurations().size()); ConvolutionLayer l0 = (ConvolutionLayer) conf.getConf(0).getLayer(); assertEquals("tanh", l0.getActivationFn().toString()); @@ -191,8 +191,8 @@ public class RegressionTest071 extends BaseDL4JTest { MultiLayerNetwork net = ModelSerializer.restoreMultiLayerNetwork(f, true); - MultiLayerConfiguration conf = net.getLayerWiseConfigurations(); - assertEquals(3, conf.getConfs().size()); + NeuralNetConfiguration conf = net.getConfiguration(); + assertEquals(3, conf.getNetConfigurations().size()); GravesLSTM l0 = (GravesLSTM) conf.getConf(0).getLayer(); assertEquals("tanh", l0.getActivationFn().toString()); @@ -224,7 +224,7 @@ public class RegressionTest071 extends BaseDL4JTest { ComputationGraphConfiguration conf = net.getComputationGraphConfiguration(); assertEquals(3, conf.getVertices().size()); - GravesLSTM l0 = (GravesLSTM) ((LayerVertex) conf.getVertices().get("0")).getLayerConf().getLayer(); + GravesLSTM l0 = (GravesLSTM) ((LayerVertex) conf.getVertices().get("0")).getNetConfiguration().getFirstLayer(); assertEquals("tanh", l0.getActivationFn().toString()); assertEquals(3, l0.getNIn()); assertEquals(4, l0.getNOut()); @@ -232,14 +232,14 @@ public class RegressionTest071 extends BaseDL4JTest { assertEquals(1.5, l0.getGradientNormalizationThreshold(), 1e-5); GravesBidirectionalLSTM l1 = - (GravesBidirectionalLSTM) ((LayerVertex) conf.getVertices().get("1")).getLayerConf().getLayer(); + (GravesBidirectionalLSTM) ((LayerVertex) conf.getVertices().get("1")).getNetConfiguration().getFirstLayer(); assertEquals("softsign", l1.getActivationFn().toString()); assertEquals(4, l1.getNIn()); assertEquals(4, l1.getNOut()); assertEquals(GradientNormalization.ClipElementWiseAbsoluteValue, l1.getGradientNormalization()); assertEquals(1.5, l1.getGradientNormalizationThreshold(), 1e-5); - RnnOutputLayer l2 = (RnnOutputLayer) ((LayerVertex) conf.getVertices().get("2")).getLayerConf().getLayer(); + RnnOutputLayer l2 = (RnnOutputLayer) ((LayerVertex) conf.getVertices().get("2")).getNetConfiguration().getFirstLayer(); assertEquals(4, l2.getNIn()); assertEquals(5, l2.getNOut()); assertEquals("softmax", l2.getActivationFn().toString()); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest080.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest080.java index 6460582ba..06af06ff4 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest080.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest080.java @@ -25,7 +25,7 @@ import org.deeplearning4j.TestUtils; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.GradientNormalization; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.dropout.Dropout; import org.deeplearning4j.nn.conf.graph.LayerVertex; @@ -67,8 +67,8 @@ public class RegressionTest080 extends BaseDL4JTest { MultiLayerNetwork net = ModelSerializer.restoreMultiLayerNetwork(f, true); - MultiLayerConfiguration conf = net.getLayerWiseConfigurations(); - assertEquals(2, conf.getConfs().size()); + NeuralNetConfiguration conf = net.getConfiguration(); + assertEquals(2, conf.getNetConfigurations().size()); DenseLayer l0 = (DenseLayer) conf.getConf(0).getLayer(); assertTrue(l0.getActivationFn() instanceof ActivationReLU); @@ -106,8 +106,8 @@ public class RegressionTest080 extends BaseDL4JTest { MultiLayerNetwork net = ModelSerializer.restoreMultiLayerNetwork(f, true); - MultiLayerConfiguration conf = net.getLayerWiseConfigurations(); - assertEquals(2, conf.getConfs().size()); + NeuralNetConfiguration conf = net.getConfiguration(); + assertEquals(2, conf.getNetConfigurations().size()); DenseLayer l0 = (DenseLayer) conf.getConf(0).getLayer(); assertTrue(l0.getActivationFn() instanceof ActivationLReLU); @@ -155,8 +155,8 @@ public class RegressionTest080 extends BaseDL4JTest { MultiLayerNetwork net = ModelSerializer.restoreMultiLayerNetwork(f, true); - MultiLayerConfiguration conf = net.getLayerWiseConfigurations(); - assertEquals(3, conf.getConfs().size()); + NeuralNetConfiguration conf = net.getConfiguration(); + assertEquals(3, conf.getNetConfigurations().size()); ConvolutionLayer l0 = (ConvolutionLayer) conf.getConf(0).getLayer(); assertTrue(l0.getActivationFn() instanceof ActivationTanH); @@ -206,8 +206,8 @@ public class RegressionTest080 extends BaseDL4JTest { MultiLayerNetwork net = ModelSerializer.restoreMultiLayerNetwork(f, true); - MultiLayerConfiguration conf = net.getLayerWiseConfigurations(); - assertEquals(3, conf.getConfs().size()); + NeuralNetConfiguration conf = net.getConfiguration(); + assertEquals(3, conf.getNetConfigurations().size()); GravesLSTM l0 = (GravesLSTM) conf.getConf(0).getLayer(); assertTrue(l0.getActivationFn() instanceof ActivationTanH); @@ -240,7 +240,7 @@ public class RegressionTest080 extends BaseDL4JTest { ComputationGraphConfiguration conf = net.getComputationGraphConfiguration(); assertEquals(3, conf.getVertices().size()); - GravesLSTM l0 = (GravesLSTM) ((LayerVertex) conf.getVertices().get("0")).getLayerConf().getLayer(); + GravesLSTM l0 = (GravesLSTM) ((LayerVertex) conf.getVertices().get("0")).getNetConfiguration().getFirstLayer(); assertTrue(l0.getActivationFn() instanceof ActivationTanH); assertEquals(3, l0.getNIn()); assertEquals(4, l0.getNOut()); @@ -248,14 +248,14 @@ public class RegressionTest080 extends BaseDL4JTest { assertEquals(1.5, l0.getGradientNormalizationThreshold(), 1e-5); GravesBidirectionalLSTM l1 = - (GravesBidirectionalLSTM) ((LayerVertex) conf.getVertices().get("1")).getLayerConf().getLayer(); + (GravesBidirectionalLSTM) ((LayerVertex) conf.getVertices().get("1")).getNetConfiguration().getFirstLayer(); assertTrue(l1.getActivationFn() instanceof ActivationSoftSign); assertEquals(4, l1.getNIn()); assertEquals(4, l1.getNOut()); assertEquals(GradientNormalization.ClipElementWiseAbsoluteValue, l1.getGradientNormalization()); assertEquals(1.5, l1.getGradientNormalizationThreshold(), 1e-5); - RnnOutputLayer l2 = (RnnOutputLayer) ((LayerVertex) conf.getVertices().get("2")).getLayerConf().getLayer(); + RnnOutputLayer l2 = (RnnOutputLayer) ((LayerVertex) conf.getVertices().get("2")).getNetConfiguration().getFirstLayer(); assertEquals(4, l2.getNIn()); assertEquals(5, l2.getNOut()); assertTrue(l2.getActivationFn() instanceof ActivationSoftmax); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100a.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100a.java index f294e16a7..a847a85ef 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100a.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100a.java @@ -86,30 +86,30 @@ public class RegressionTest100a extends BaseDL4JTest { File f = Resources.asFile("regression_testing/100a/GravesLSTMCharModelingExample_100a.bin"); MultiLayerNetwork net = MultiLayerNetwork.load(f, true); - GravesLSTM l0 = (GravesLSTM) net.getLayer(0).conf().getLayer(); + GravesLSTM l0 = (GravesLSTM) net.getLayer(0).getLayerConfiguration(); assertEquals(new ActivationTanH(), l0.getActivationFn()); assertEquals(200, l0.getNOut()); assertEquals(new WeightInitXavier(), l0.getWeightInitFn()); assertEquals(new WeightDecay(0.001, false), TestUtils.getWeightDecayReg(l0)); assertEquals(new RmsProp(0.1), l0.getIUpdater()); - GravesLSTM l1 = (GravesLSTM) net.getLayer(1).conf().getLayer(); + GravesLSTM l1 = (GravesLSTM) net.getLayer(1).getLayerConfiguration(); assertEquals(new ActivationTanH(), l1.getActivationFn()); assertEquals(200, l1.getNOut()); assertEquals(new WeightInitXavier(), l1.getWeightInitFn()); assertEquals(new WeightDecay(0.001, false), TestUtils.getWeightDecayReg(l1)); assertEquals(new RmsProp(0.1), l1.getIUpdater()); - RnnOutputLayer l2 = (RnnOutputLayer) net.getLayer(2).conf().getLayer(); + RnnOutputLayer l2 = (RnnOutputLayer) net.getLayer(2).getLayerConfiguration(); assertEquals(new ActivationSoftmax(), l2.getActivationFn()); assertEquals(77, l2.getNOut()); assertEquals(new WeightInitXavier(), l2.getWeightInitFn()); assertEquals(new WeightDecay(0.001, false), TestUtils.getWeightDecayReg(l0)); assertEquals(new RmsProp(0.1), l0.getIUpdater()); - assertEquals(BackpropType.TruncatedBPTT, net.getLayerWiseConfigurations().getBackpropType()); - assertEquals(50, net.getLayerWiseConfigurations().getTbpttBackLength()); - assertEquals(50, net.getLayerWiseConfigurations().getTbpttFwdLength()); + assertEquals(BackpropType.TruncatedBPTT, net.getConfiguration().getBackpropType()); + assertEquals(50, net.getConfiguration().getTbpttBackLength()); + assertEquals(50, net.getConfiguration().getTbpttFwdLength()); INDArray outExp; File f2 = Resources.asFile("regression_testing/100a/GravesLSTMCharModelingExample_Output_100a.bin"); @@ -134,7 +134,7 @@ public class RegressionTest100a extends BaseDL4JTest { File f = Resources.asFile("regression_testing/100a/VaeMNISTAnomaly_100a.bin"); MultiLayerNetwork net = MultiLayerNetwork.load(f, true); - VariationalAutoencoder l0 = (VariationalAutoencoder) net.getLayer(0).conf().getLayer(); + VariationalAutoencoder l0 = (VariationalAutoencoder) net.getLayer(0).getLayerConfiguration(); assertEquals(new ActivationLReLU(), l0.getActivationFn()); assertEquals(32, l0.getNOut()); assertArrayEquals(new int[]{256, 256}, l0.getEncoderLayerSizes()); @@ -171,7 +171,7 @@ public class RegressionTest100a extends BaseDL4JTest { int nBoxes = 5; int nClasses = 10; - ConvolutionLayer cl = (ConvolutionLayer)((LayerVertex)net.getComputationGraphConfiguration().getVertices().get("convolution2d_9")).getLayerConf().getLayer(); + ConvolutionLayer cl = (ConvolutionLayer)((LayerVertex)net.getComputationGraphConfiguration().getVertices().get("convolution2d_9")).getNetConfiguration().getFirstLayer(); assertEquals(nBoxes * (5 + nClasses), cl.getNOut()); assertEquals(new ActivationIdentity(), cl.getActivationFn()); assertEquals(ConvolutionMode.Same, cl.getConvolutionMode()); @@ -195,8 +195,8 @@ public class RegressionTest100a extends BaseDL4JTest { //Which means: the record output doesn't have this. To account for this, we'll manually set eps to 0.0 here //https://github.com/deeplearning4j/deeplearning4j/issues/5836#issuecomment-405526228 for(Layer l : net.getLayers()){ - if(l.conf().getLayer() instanceof BatchNormalization){ - BatchNormalization bn = (BatchNormalization) l.conf().getLayer(); + if(l.getLayerConfiguration() instanceof BatchNormalization){ + BatchNormalization bn = (BatchNormalization) l.getLayerConfiguration(); bn.setEps(0.0); } } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b3.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b3.java index 35fb7391b..23ae5d5bd 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b3.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b3.java @@ -72,12 +72,12 @@ public class RegressionTest100b3 extends BaseDL4JTest { MultiLayerNetwork net = MultiLayerNetwork.load(f, true); // net = net.clone(); - DenseLayer l0 = (DenseLayer) net.getLayer(0).conf().getLayer(); + DenseLayer l0 = (DenseLayer) net.getLayer(0).getLayerConfiguration(); assertEquals(new ActivationTanH(), l0.getActivationFn()); assertEquals(new WeightDecay(0.03, false), TestUtils.getWeightDecayReg(l0)); assertEquals(new RmsProp(0.95), l0.getIUpdater()); - CustomLayer l1 = (CustomLayer) net.getLayer(1).conf().getLayer(); + CustomLayer l1 = (CustomLayer) net.getLayer(1).getLayerConfiguration(); assertEquals(new ActivationTanH(), l1.getActivationFn()); assertEquals(new ActivationSigmoid(), l1.getSecondActivationFunction()); assertEquals(new RmsProp(0.95), l1.getIUpdater()); @@ -108,7 +108,7 @@ public class RegressionTest100b3 extends BaseDL4JTest { List activations = net.feedForward(in); - assertEquals(dt, net.getLayerWiseConfigurations().getDataType()); + assertEquals(dt, net.getConfiguration().getDataType()); assertEquals(dt, net.params().dataType()); assertEquals( outExp, outAct, dtype); } @@ -121,30 +121,30 @@ public class RegressionTest100b3 extends BaseDL4JTest { File f = Resources.asFile("regression_testing/100b3/GravesLSTMCharModelingExample_100b3.bin"); MultiLayerNetwork net = MultiLayerNetwork.load(f, true); - LSTM l0 = (LSTM) net.getLayer(0).conf().getLayer(); + LSTM l0 = (LSTM) net.getLayer(0).getLayerConfiguration(); assertEquals(new ActivationTanH(), l0.getActivationFn()); assertEquals(200, l0.getNOut()); assertEquals(new WeightInitXavier(), l0.getWeightInitFn()); assertEquals(new WeightDecay(0.0001, false), TestUtils.getWeightDecayReg(l0)); assertEquals(new Adam(0.005), l0.getIUpdater()); - LSTM l1 = (LSTM) net.getLayer(1).conf().getLayer(); + LSTM l1 = (LSTM) net.getLayer(1).getLayerConfiguration(); assertEquals(new ActivationTanH(), l1.getActivationFn()); assertEquals(200, l1.getNOut()); assertEquals(new WeightInitXavier(), l1.getWeightInitFn()); assertEquals(new WeightDecay(0.0001, false), TestUtils.getWeightDecayReg(l1)); assertEquals(new Adam(0.005), l1.getIUpdater()); - RnnOutputLayer l2 = (RnnOutputLayer) net.getLayer(2).conf().getLayer(); + RnnOutputLayer l2 = (RnnOutputLayer) net.getLayer(2).getLayerConfiguration(); assertEquals(new ActivationSoftmax(), l2.getActivationFn()); assertEquals(77, l2.getNOut()); assertEquals(new WeightInitXavier(), l2.getWeightInitFn()); assertEquals(new WeightDecay(0.0001, false), TestUtils.getWeightDecayReg(l0)); assertEquals(new Adam(0.005), l0.getIUpdater()); - assertEquals(BackpropType.TruncatedBPTT, net.getLayerWiseConfigurations().getBackpropType()); - assertEquals(50, net.getLayerWiseConfigurations().getTbpttBackLength()); - assertEquals(50, net.getLayerWiseConfigurations().getTbpttFwdLength()); + assertEquals(BackpropType.TruncatedBPTT, net.getConfiguration().getBackpropType()); + assertEquals(50, net.getConfiguration().getTbpttBackLength()); + assertEquals(50, net.getConfiguration().getTbpttFwdLength()); INDArray outExp; File f2 = Resources.asFile("regression_testing/100b3/GravesLSTMCharModelingExample_Output_100b3.bin"); @@ -169,7 +169,7 @@ public class RegressionTest100b3 extends BaseDL4JTest { File f = Resources.asFile("regression_testing/100b3/VaeMNISTAnomaly_100b3.bin"); MultiLayerNetwork net = MultiLayerNetwork.load(f, true); - VariationalAutoencoder l0 = (VariationalAutoencoder) net.getLayer(0).conf().getLayer(); + VariationalAutoencoder l0 = (VariationalAutoencoder) net.getLayer(0).getLayerConfiguration(); assertEquals(new ActivationLReLU(), l0.getActivationFn()); assertEquals(32, l0.getNOut()); assertArrayEquals(new int[]{256, 256}, l0.getEncoderLayerSizes()); @@ -206,7 +206,7 @@ public class RegressionTest100b3 extends BaseDL4JTest { int nBoxes = 5; int nClasses = 10; - ConvolutionLayer cl = (ConvolutionLayer)((LayerVertex)net.getComputationGraphConfiguration().getVertices().get("convolution2d_9")).getLayerConf().getLayer(); + ConvolutionLayer cl = (ConvolutionLayer)((LayerVertex)net.getComputationGraphConfiguration().getVertices().get("convolution2d_9")).getNetConfiguration().getFirstLayer(); assertEquals(nBoxes * (5 + nClasses), cl.getNOut()); assertEquals(new ActivationIdentity(), cl.getActivationFn()); assertEquals(ConvolutionMode.Same, cl.getConvolutionMode()); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b4.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b4.java index 00e46bf0c..fbbe55592 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b4.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b4.java @@ -91,12 +91,12 @@ public class RegressionTest100b4 extends BaseDL4JTest { MultiLayerNetwork net = MultiLayerNetwork.load(f, true); // net = net.clone(); - DenseLayer l0 = (DenseLayer) net.getLayer(0).conf().getLayer(); + DenseLayer l0 = (DenseLayer) net.getLayer(0).getLayerConfiguration(); assertEquals(new ActivationTanH(), l0.getActivationFn()); assertEquals(new L2Regularization(0.03), TestUtils.getL2Reg(l0)); assertEquals(new RmsProp(0.95), l0.getIUpdater()); - CustomLayer l1 = (CustomLayer) net.getLayer(1).conf().getLayer(); + CustomLayer l1 = (CustomLayer) net.getLayer(1).getLayerConfiguration(); assertEquals(new ActivationTanH(), l1.getActivationFn()); assertEquals(new ActivationSigmoid(), l1.getSecondActivationFunction()); assertEquals(new RmsProp(0.95), l1.getIUpdater()); @@ -125,7 +125,7 @@ public class RegressionTest100b4 extends BaseDL4JTest { INDArray outAct = net.output(in); assertEquals(dtype, outAct.dataType()); - assertEquals(dtype, net.getLayerWiseConfigurations().getDataType()); + assertEquals(dtype, net.getConfiguration().getDataType()); assertEquals(dtype, net.params().dataType()); boolean eq = outExp.equalsWithEps(outAct, 0.01); assertTrue(eq, "Test for dtype: " + dtypeName + "\n" + outExp + " vs " + outAct); @@ -139,30 +139,30 @@ public class RegressionTest100b4 extends BaseDL4JTest { File f = Resources.asFile("regression_testing/100b4/GravesLSTMCharModelingExample_100b4.bin"); MultiLayerNetwork net = MultiLayerNetwork.load(f, true); - LSTM l0 = (LSTM) net.getLayer(0).conf().getLayer(); + LSTM l0 = (LSTM) net.getLayer(0).getLayerConfiguration(); assertEquals(new ActivationTanH(), l0.getActivationFn()); assertEquals(200, l0.getNOut()); assertEquals(new WeightInitXavier(), l0.getWeightInitFn()); assertEquals(new L2Regularization(0.0001), TestUtils.getL2Reg(l0)); assertEquals(new Adam(0.005), l0.getIUpdater()); - LSTM l1 = (LSTM) net.getLayer(1).conf().getLayer(); + LSTM l1 = (LSTM) net.getLayer(1).getLayerConfiguration(); assertEquals(new ActivationTanH(), l1.getActivationFn()); assertEquals(200, l1.getNOut()); assertEquals(new WeightInitXavier(), l1.getWeightInitFn()); assertEquals(new L2Regularization(0.0001), TestUtils.getL2Reg(l1)); assertEquals(new Adam(0.005), l1.getIUpdater()); - RnnOutputLayer l2 = (RnnOutputLayer) net.getLayer(2).conf().getLayer(); + RnnOutputLayer l2 = (RnnOutputLayer) net.getLayer(2).getLayerConfiguration(); assertEquals(new ActivationSoftmax(), l2.getActivationFn()); assertEquals(77, l2.getNOut()); assertEquals(new WeightInitXavier(), l2.getWeightInitFn()); assertEquals(new L2Regularization(0.0001), TestUtils.getL2Reg(l2)); assertEquals(new Adam(0.005), l2.getIUpdater()); - assertEquals(BackpropType.TruncatedBPTT, net.getLayerWiseConfigurations().getBackpropType()); - assertEquals(50, net.getLayerWiseConfigurations().getTbpttBackLength()); - assertEquals(50, net.getLayerWiseConfigurations().getTbpttFwdLength()); + assertEquals(BackpropType.TruncatedBPTT, net.getConfiguration().getBackpropType()); + assertEquals(50, net.getConfiguration().getTbpttBackLength()); + assertEquals(50, net.getConfiguration().getTbpttFwdLength()); INDArray outExp; File f2 = Resources.asFile("regression_testing/100b4/GravesLSTMCharModelingExample_Output_100b4.bin"); @@ -187,7 +187,7 @@ public class RegressionTest100b4 extends BaseDL4JTest { File f = Resources.asFile("regression_testing/100b4/VaeMNISTAnomaly_100b4.bin"); MultiLayerNetwork net = MultiLayerNetwork.load(f, true); - VariationalAutoencoder l0 = (VariationalAutoencoder) net.getLayer(0).conf().getLayer(); + VariationalAutoencoder l0 = (VariationalAutoencoder) net.getLayer(0).getLayerConfiguration(); assertEquals(new ActivationLReLU(), l0.getActivationFn()); assertEquals(32, l0.getNOut()); assertArrayEquals(new int[]{256, 256}, l0.getEncoderLayerSizes()); @@ -225,7 +225,7 @@ public class RegressionTest100b4 extends BaseDL4JTest { int nClasses = 10; ConvolutionLayer cl = (ConvolutionLayer) ((LayerVertex) net.getComputationGraphConfiguration().getVertices() - .get("convolution2d_9")).getLayerConf().getLayer(); + .get("convolution2d_9")).getNetConfiguration().getFirstLayer(); assertEquals(nBoxes * (5 + nClasses), cl.getNOut()); assertEquals(new ActivationIdentity(), cl.getActivationFn()); assertEquals(ConvolutionMode.Same, cl.getConvolutionMode()); @@ -257,7 +257,7 @@ public class RegressionTest100b4 extends BaseDL4JTest { File f = Resources.asFile("regression_testing/100b4/SyntheticCNN_100b4.bin"); MultiLayerNetwork net = MultiLayerNetwork.load(f, true); - ConvolutionLayer l0 = (ConvolutionLayer) net.getLayer(0).conf().getLayer(); + ConvolutionLayer l0 = (ConvolutionLayer) net.getLayer(0).getLayerConfiguration(); assertEquals(new ActivationReLU(), l0.getActivationFn()); assertEquals(4, l0.getNOut()); assertEquals(new WeightInitXavier(), l0.getWeightInitFn()); @@ -268,7 +268,7 @@ public class RegressionTest100b4 extends BaseDL4JTest { assertArrayEquals(new int[]{1, 1}, l0.getDilation()); assertArrayEquals(new int[]{0, 0}, l0.getPadding()); - SeparableConvolution2D l1 = (SeparableConvolution2D) net.getLayer(1).conf().getLayer(); + SeparableConvolution2D l1 = (SeparableConvolution2D) net.getLayer(1).getLayerConfiguration(); assertEquals(new ActivationReLU(), l1.getActivationFn()); assertEquals(8, l1.getNOut()); assertEquals(new WeightInitXavier(), l1.getWeightInitFn()); @@ -281,20 +281,20 @@ public class RegressionTest100b4 extends BaseDL4JTest { assertEquals(ConvolutionMode.Same, l1.getConvolutionMode()); assertEquals(1, l1.getDepthMultiplier()); - SubsamplingLayer l2 = (SubsamplingLayer) net.getLayer(2).conf().getLayer(); + SubsamplingLayer l2 = (SubsamplingLayer) net.getLayer(2).getLayerConfiguration(); assertArrayEquals(new int[]{3, 3}, l2.getKernelSize()); assertArrayEquals(new int[]{2, 2}, l2.getStride()); assertArrayEquals(new int[]{1, 1}, l2.getDilation()); assertArrayEquals(new int[]{0, 0}, l2.getPadding()); assertEquals(PoolingType.MAX, l2.getPoolingType()); - ZeroPaddingLayer l3 = (ZeroPaddingLayer) net.getLayer(3).conf().getLayer(); + ZeroPaddingLayer l3 = (ZeroPaddingLayer) net.getLayer(3).getLayerConfiguration(); assertArrayEquals(new int[]{4, 4, 4, 4}, l3.getPadding()); - Upsampling2D l4 = (Upsampling2D) net.getLayer(4).conf().getLayer(); + Upsampling2D l4 = (Upsampling2D) net.getLayer(4).getLayerConfiguration(); assertArrayEquals(new int[]{3, 3}, l4.getSize()); - DepthwiseConvolution2D l5 = (DepthwiseConvolution2D) net.getLayer(5).conf().getLayer(); + DepthwiseConvolution2D l5 = (DepthwiseConvolution2D) net.getLayer(5).getLayerConfiguration(); assertEquals(new ActivationReLU(), l5.getActivationFn()); assertEquals(16, l5.getNOut()); assertEquals(new WeightInitXavier(), l5.getWeightInitFn()); @@ -306,17 +306,17 @@ public class RegressionTest100b4 extends BaseDL4JTest { assertArrayEquals(new int[]{0, 0}, l5.getPadding()); assertEquals(2, l5.getDepthMultiplier()); - SubsamplingLayer l6 = (SubsamplingLayer) net.getLayer(6).conf().getLayer(); + SubsamplingLayer l6 = (SubsamplingLayer) net.getLayer(6).getLayerConfiguration(); assertArrayEquals(new int[]{2, 2}, l6.getKernelSize()); assertArrayEquals(new int[]{2, 2}, l6.getStride()); assertArrayEquals(new int[]{1, 1}, l6.getDilation()); assertArrayEquals(new int[]{0, 0}, l6.getPadding()); assertEquals(PoolingType.MAX, l6.getPoolingType()); - Cropping2D l7 = (Cropping2D) net.getLayer(7).conf().getLayer(); + Cropping2D l7 = (Cropping2D) net.getLayer(7).getLayerConfiguration(); assertArrayEquals(new int[]{3, 3, 2, 2}, l7.getCropping()); - ConvolutionLayer l8 = (ConvolutionLayer) net.getLayer(8).conf().getLayer(); + ConvolutionLayer l8 = (ConvolutionLayer) net.getLayer(8).getLayerConfiguration(); assertEquals(4, l8.getNOut()); assertEquals(new WeightInitXavier(), l8.getWeightInitFn()); assertEquals(new L2Regularization(0.0001), TestUtils.getL2Reg(l8)); @@ -326,7 +326,7 @@ public class RegressionTest100b4 extends BaseDL4JTest { assertArrayEquals(new int[]{1, 1}, l8.getDilation()); assertArrayEquals(new int[]{0, 0}, l8.getPadding()); - CnnLossLayer l9 = (CnnLossLayer) net.getLayer(9).conf().getLayer(); + CnnLossLayer l9 = (CnnLossLayer) net.getLayer(9).getLayerConfiguration(); assertEquals(new WeightInitXavier(), l9.getWeightInitFn()); assertEquals(new L2Regularization(0.0001), TestUtils.getL2Reg(l9)); assertEquals(new Adam(0.005), l9.getIUpdater()); @@ -361,7 +361,7 @@ public class RegressionTest100b4 extends BaseDL4JTest { File f = Resources.asFile("regression_testing/100b4/SyntheticBidirectionalRNNGraph_100b4.bin"); ComputationGraph net = ComputationGraph.load(f, true); - Bidirectional l0 = (Bidirectional) net.getLayer("rnn1").conf().getLayer(); + Bidirectional l0 = (Bidirectional) net.getLayer("rnn1").getLayerConfiguration(); LSTM l1 = (LSTM) l0.getFwd(); assertEquals(16, l1.getNOut()); @@ -373,7 +373,7 @@ public class RegressionTest100b4 extends BaseDL4JTest { assertEquals(new ActivationReLU(), l2.getActivationFn()); assertEquals(new L2Regularization(0.0001), TestUtils.getL2Reg(l2)); - Bidirectional l3 = (Bidirectional) net.getLayer("rnn2").conf().getLayer(); + Bidirectional l3 = (Bidirectional) net.getLayer("rnn2").getLayerConfiguration(); SimpleRnn l4 = (SimpleRnn) l3.getFwd(); assertEquals(16, l4.getNOut()); @@ -387,12 +387,12 @@ public class RegressionTest100b4 extends BaseDL4JTest { MergeVertex mv = (MergeVertex) net.getVertex("concat"); - GlobalPoolingLayer gpl = (GlobalPoolingLayer) net.getLayer("pooling").conf().getLayer(); + GlobalPoolingLayer gpl = (GlobalPoolingLayer) net.getLayer("pooling").getLayerConfiguration(); assertEquals(PoolingType.MAX, gpl.getPoolingType()); assertArrayEquals(new int[]{2}, gpl.getPoolingDimensions()); assertTrue(gpl.isCollapseDimensions()); - OutputLayer outl = (OutputLayer) net.getLayer("out").conf().getLayer(); + OutputLayer outl = (OutputLayer) net.getLayer("out").getLayerConfiguration(); assertEquals(3, outl.getNOut()); assertEquals(new LossMCXENT(), outl.getLossFn()); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b6.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b6.java index 15a9c2bc3..979518196 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b6.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b6.java @@ -73,12 +73,12 @@ public class RegressionTest100b6 extends BaseDL4JTest { MultiLayerNetwork net = MultiLayerNetwork.load(f, true); // net = net.clone(); - DenseLayer l0 = (DenseLayer) net.getLayer(0).conf().getLayer(); + DenseLayer l0 = (DenseLayer) net.getLayer(0).getLayerConfiguration(); assertEquals(new ActivationTanH(), l0.getActivationFn()); assertEquals(new L2Regularization(0.03), TestUtils.getL2Reg(l0)); assertEquals(new RmsProp(0.95), l0.getIUpdater()); - CustomLayer l1 = (CustomLayer) net.getLayer(1).conf().getLayer(); + CustomLayer l1 = (CustomLayer) net.getLayer(1).getLayerConfiguration(); assertEquals(new ActivationTanH(), l1.getActivationFn()); assertEquals(new ActivationSigmoid(), l1.getSecondActivationFunction()); assertEquals(new RmsProp(0.95), l1.getIUpdater()); @@ -107,7 +107,7 @@ public class RegressionTest100b6 extends BaseDL4JTest { INDArray outAct = net.output(in); assertEquals(dtype, outAct.dataType()); - assertEquals(dtype, net.getLayerWiseConfigurations().getDataType()); + assertEquals(dtype, net.getConfiguration().getDataType()); assertEquals(dtype, net.params().dataType()); boolean eq = outExp.equalsWithEps(outAct, 0.01); assertTrue( eq, "Test for dtype: " + dtypeName + " - " + outExp + " vs " + outAct); @@ -121,30 +121,30 @@ public class RegressionTest100b6 extends BaseDL4JTest { File f = Resources.asFile("regression_testing/100b6/GravesLSTMCharModelingExample_100b6.bin"); MultiLayerNetwork net = MultiLayerNetwork.load(f, true); - LSTM l0 = (LSTM) net.getLayer(0).conf().getLayer(); + LSTM l0 = (LSTM) net.getLayer(0).getLayerConfiguration(); assertEquals(new ActivationTanH(), l0.getActivationFn()); assertEquals(200, l0.getNOut()); assertEquals(new WeightInitXavier(), l0.getWeightInitFn()); assertEquals(new L2Regularization(0.0001), TestUtils.getL2Reg(l0)); assertEquals(new Adam(0.005), l0.getIUpdater()); - LSTM l1 = (LSTM) net.getLayer(1).conf().getLayer(); + LSTM l1 = (LSTM) net.getLayer(1).getLayerConfiguration(); assertEquals(new ActivationTanH(), l1.getActivationFn()); assertEquals(200, l1.getNOut()); assertEquals(new WeightInitXavier(), l1.getWeightInitFn()); assertEquals(new L2Regularization(0.0001), TestUtils.getL2Reg(l1)); assertEquals(new Adam(0.005), l1.getIUpdater()); - RnnOutputLayer l2 = (RnnOutputLayer) net.getLayer(2).conf().getLayer(); + RnnOutputLayer l2 = (RnnOutputLayer) net.getLayer(2).getLayerConfiguration(); assertEquals(new ActivationSoftmax(), l2.getActivationFn()); assertEquals(77, l2.getNOut()); assertEquals(new WeightInitXavier(), l2.getWeightInitFn()); assertEquals(new L2Regularization(0.0001), TestUtils.getL2Reg(l2)); assertEquals(new Adam(0.005), l2.getIUpdater()); - assertEquals(BackpropType.TruncatedBPTT, net.getLayerWiseConfigurations().getBackpropType()); - assertEquals(50, net.getLayerWiseConfigurations().getTbpttBackLength()); - assertEquals(50, net.getLayerWiseConfigurations().getTbpttFwdLength()); + assertEquals(BackpropType.TruncatedBPTT, net.getConfiguration().getBackpropType()); + assertEquals(50, net.getConfiguration().getTbpttBackLength()); + assertEquals(50, net.getConfiguration().getTbpttFwdLength()); INDArray outExp; File f2 = Resources.asFile("regression_testing/100b6/GravesLSTMCharModelingExample_Output_100b6.bin"); @@ -169,7 +169,7 @@ public class RegressionTest100b6 extends BaseDL4JTest { File f = Resources.asFile("regression_testing/100b6/VaeMNISTAnomaly_100b6.bin"); MultiLayerNetwork net = MultiLayerNetwork.load(f, true); - VariationalAutoencoder l0 = (VariationalAutoencoder) net.getLayer(0).conf().getLayer(); + VariationalAutoencoder l0 = (VariationalAutoencoder) net.getLayer(0).getLayerConfiguration(); assertEquals(new ActivationLReLU(), l0.getActivationFn()); assertEquals(32, l0.getNOut()); assertArrayEquals(new int[]{256, 256}, l0.getEncoderLayerSizes()); @@ -206,7 +206,7 @@ public class RegressionTest100b6 extends BaseDL4JTest { int nClasses = 10; ConvolutionLayer cl = (ConvolutionLayer) ((LayerVertex) net.getComputationGraphConfiguration().getVertices() - .get("convolution2d_9")).getLayerConf().getLayer(); + .get("convolution2d_9")).getNetConfiguration().getFirstLayer(); assertEquals(nBoxes * (5 + nClasses), cl.getNOut()); assertEquals(new ActivationIdentity(), cl.getActivationFn()); assertEquals(ConvolutionMode.Same, cl.getConvolutionMode()); @@ -237,7 +237,7 @@ public class RegressionTest100b6 extends BaseDL4JTest { File f = Resources.asFile("regression_testing/100b6/SyntheticCNN_100b6.bin"); MultiLayerNetwork net = MultiLayerNetwork.load(f, true); - ConvolutionLayer l0 = (ConvolutionLayer) net.getLayer(0).conf().getLayer(); + ConvolutionLayer l0 = (ConvolutionLayer) net.getLayer(0).getLayerConfiguration(); assertEquals(new ActivationReLU(), l0.getActivationFn()); assertEquals(4, l0.getNOut()); assertEquals(new WeightInitXavier(), l0.getWeightInitFn()); @@ -248,7 +248,7 @@ public class RegressionTest100b6 extends BaseDL4JTest { assertArrayEquals(new int[]{1, 1}, l0.getDilation()); assertArrayEquals(new int[]{0, 0}, l0.getPadding()); - SeparableConvolution2D l1 = (SeparableConvolution2D) net.getLayer(1).conf().getLayer(); + SeparableConvolution2D l1 = (SeparableConvolution2D) net.getLayer(1).getLayerConfiguration(); assertEquals(new ActivationReLU(), l1.getActivationFn()); assertEquals(8, l1.getNOut()); assertEquals(new WeightInitXavier(), l1.getWeightInitFn()); @@ -261,20 +261,20 @@ public class RegressionTest100b6 extends BaseDL4JTest { assertEquals(ConvolutionMode.Same, l1.getConvolutionMode()); assertEquals(1, l1.getDepthMultiplier()); - SubsamplingLayer l2 = (SubsamplingLayer) net.getLayer(2).conf().getLayer(); + SubsamplingLayer l2 = (SubsamplingLayer) net.getLayer(2).getLayerConfiguration(); assertArrayEquals(new int[]{3, 3}, l2.getKernelSize()); assertArrayEquals(new int[]{2, 2}, l2.getStride()); assertArrayEquals(new int[]{1, 1}, l2.getDilation()); assertArrayEquals(new int[]{0, 0}, l2.getPadding()); assertEquals(PoolingType.MAX, l2.getPoolingType()); - ZeroPaddingLayer l3 = (ZeroPaddingLayer) net.getLayer(3).conf().getLayer(); + ZeroPaddingLayer l3 = (ZeroPaddingLayer) net.getLayer(3).getLayerConfiguration(); assertArrayEquals(new int[]{4, 4, 4, 4}, l3.getPadding()); - Upsampling2D l4 = (Upsampling2D) net.getLayer(4).conf().getLayer(); + Upsampling2D l4 = (Upsampling2D) net.getLayer(4).getLayerConfiguration(); assertArrayEquals(new int[]{3, 3}, l4.getSize()); - DepthwiseConvolution2D l5 = (DepthwiseConvolution2D) net.getLayer(5).conf().getLayer(); + DepthwiseConvolution2D l5 = (DepthwiseConvolution2D) net.getLayer(5).getLayerConfiguration(); assertEquals(new ActivationReLU(), l5.getActivationFn()); assertEquals(16, l5.getNOut()); assertEquals(new WeightInitXavier(), l5.getWeightInitFn()); @@ -286,17 +286,17 @@ public class RegressionTest100b6 extends BaseDL4JTest { assertArrayEquals(new int[]{0, 0}, l5.getPadding()); assertEquals(2, l5.getDepthMultiplier()); - SubsamplingLayer l6 = (SubsamplingLayer) net.getLayer(6).conf().getLayer(); + SubsamplingLayer l6 = (SubsamplingLayer) net.getLayer(6).getLayerConfiguration(); assertArrayEquals(new int[]{2, 2}, l6.getKernelSize()); assertArrayEquals(new int[]{2, 2}, l6.getStride()); assertArrayEquals(new int[]{1, 1}, l6.getDilation()); assertArrayEquals(new int[]{0, 0}, l6.getPadding()); assertEquals(PoolingType.MAX, l6.getPoolingType()); - Cropping2D l7 = (Cropping2D) net.getLayer(7).conf().getLayer(); + Cropping2D l7 = (Cropping2D) net.getLayer(7).getLayerConfiguration(); assertArrayEquals(new int[]{3, 3, 2, 2}, l7.getCropping()); - ConvolutionLayer l8 = (ConvolutionLayer) net.getLayer(8).conf().getLayer(); + ConvolutionLayer l8 = (ConvolutionLayer) net.getLayer(8).getLayerConfiguration(); assertEquals(4, l8.getNOut()); assertEquals(new WeightInitXavier(), l8.getWeightInitFn()); assertEquals(new L2Regularization(0.0001), TestUtils.getL2Reg(l8)); @@ -306,7 +306,7 @@ public class RegressionTest100b6 extends BaseDL4JTest { assertArrayEquals(new int[]{1, 1}, l8.getDilation()); assertArrayEquals(new int[]{0, 0}, l8.getPadding()); - CnnLossLayer l9 = (CnnLossLayer) net.getLayer(9).conf().getLayer(); + CnnLossLayer l9 = (CnnLossLayer) net.getLayer(9).getLayerConfiguration(); assertEquals(new WeightInitXavier(), l9.getWeightInitFn()); assertEquals(new L2Regularization(0.0001), TestUtils.getL2Reg(l9)); assertEquals(new Adam(0.005), l9.getIUpdater()); @@ -341,7 +341,7 @@ public class RegressionTest100b6 extends BaseDL4JTest { File f = Resources.asFile("regression_testing/100b6/SyntheticBidirectionalRNNGraph_100b6.bin"); ComputationGraph net = ComputationGraph.load(f, true); - Bidirectional l0 = (Bidirectional) net.getLayer("rnn1").conf().getLayer(); + Bidirectional l0 = (Bidirectional) net.getLayer("rnn1").getLayerConfiguration(); LSTM l1 = (LSTM) l0.getFwd(); assertEquals(16, l1.getNOut()); @@ -353,7 +353,7 @@ public class RegressionTest100b6 extends BaseDL4JTest { assertEquals(new ActivationReLU(), l2.getActivationFn()); assertEquals(new L2Regularization(0.0001), TestUtils.getL2Reg(l2)); - Bidirectional l3 = (Bidirectional) net.getLayer("rnn2").conf().getLayer(); + Bidirectional l3 = (Bidirectional) net.getLayer("rnn2").getLayerConfiguration(); SimpleRnn l4 = (SimpleRnn) l3.getFwd(); assertEquals(16, l4.getNOut()); @@ -367,12 +367,12 @@ public class RegressionTest100b6 extends BaseDL4JTest { MergeVertex mv = (MergeVertex) net.getVertex("concat"); - GlobalPoolingLayer gpl = (GlobalPoolingLayer) net.getLayer("pooling").conf().getLayer(); + GlobalPoolingLayer gpl = (GlobalPoolingLayer) net.getLayer("pooling").getLayerConfiguration(); assertEquals(PoolingType.MAX, gpl.getPoolingType()); assertArrayEquals(new int[]{2}, gpl.getPoolingDimensions()); assertTrue(gpl.isCollapseDimensions()); - OutputLayer outl = (OutputLayer) net.getLayer("out").conf().getLayer(); + OutputLayer outl = (OutputLayer) net.getLayer("out").getLayerConfiguration(); assertEquals(3, outl.getNOut()); assertEquals(new LossMCXENT(), outl.getLossFn()); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/customlayer100a/CustomLayer.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/customlayer100a/CustomLayer.java index acb3963b1..b8b3cdad6 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/customlayer100a/CustomLayer.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/customlayer100a/CustomLayer.java @@ -85,9 +85,9 @@ public class CustomLayer extends FeedForwardLayer { //Initialize the layer parameters. For example, // Note that the entries in paramTable (2 entries here: a weight array of shape [nIn,nOut] and biases of shape [1,nOut] // are in turn a view of the 'layerParamsView' array. - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); myCustomLayer.setParamTable(paramTable); - myCustomLayer.setConf(conf); + myCustomLayer.setLayerConfiguration(conf); return myCustomLayer; } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/customlayer100a/CustomLayerImpl.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/customlayer100a/CustomLayerImpl.java index 18c0ab8e0..42b91d908 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/customlayer100a/CustomLayerImpl.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/customlayer100a/CustomLayerImpl.java @@ -56,7 +56,7 @@ public class CustomLayerImpl extends BaseLayer { //Generic paramete INDArray secondHalf = output.get(NDArrayIndex.all(), NDArrayIndex.interval(columns / 2, columns)); IActivation activation1 = layerConf().getActivationFn(); - IActivation activation2 = ((CustomLayer) conf.getLayer()).getSecondActivationFunction(); + IActivation activation2 = ((CustomLayer) layerConfiguration.getFirstLayer()).getSecondActivationFunction(); //IActivation function instances modify the activation functions in-place activation1.getActivation(firstHalf, training); @@ -105,7 +105,7 @@ public class CustomLayerImpl extends BaseLayer { //Generic paramete INDArray epsilonSecondHalf = epsilon.get(NDArrayIndex.all(), NDArrayIndex.interval(columns / 2, columns)); IActivation activation1 = layerConf().getActivationFn(); - IActivation activation2 = ((CustomLayer) conf.getLayer()).getSecondActivationFunction(); + IActivation activation2 = ((CustomLayer) layerConfiguration.getFirstLayer()).getSecondActivationFunction(); //IActivation backprop method modifies the 'firstHalf' and 'secondHalf' arrays in-place, to contain dL/dz activation1.backprop(firstHalf, epsilonFirstHalf); @@ -127,7 +127,7 @@ public class CustomLayerImpl extends BaseLayer { //Generic paramete ret.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, weightGrad); ret.gradientForVariable().put(DefaultParamInitializer.BIAS_KEY, biasGrad); - INDArray epsilonNext = params.get(DefaultParamInitializer.WEIGHT_KEY).mmul(activationDerivative.transpose()).transpose(); + INDArray epsilonNext = paramsTable.get(DefaultParamInitializer.WEIGHT_KEY).mmul(activationDerivative.transpose()).transpose(); return new Pair<>(ret, epsilonNext); } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/samediff/CompareTrainingImplementations.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/samediff/CompareTrainingImplementations.java index 73610f45e..03b8192f4 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/samediff/CompareTrainingImplementations.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/samediff/CompareTrainingImplementations.java @@ -23,7 +23,6 @@ package org.deeplearning4j.samediff; import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; @@ -151,7 +150,7 @@ public class CompareTrainingImplementations extends BaseDL4JTest { //Create equivalent DL4J net - MultiLayerConfiguration mlc = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration mlc = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .weightInit(WeightInit.XAVIER).seed(12345) .l1(l1Val).l2(l2Val) @@ -165,7 +164,7 @@ public class CompareTrainingImplementations extends BaseDL4JTest { MultiLayerNetwork net = new MultiLayerNetwork(mlc); net.init(); - Map oldParams = net.paramTable(); + Map oldParams = net.getParamTable(); //Assign parameters so we have identical models at the start: w0.getArr().assign(net.getParam("0_W")); @@ -215,7 +214,7 @@ public class CompareTrainingImplementations extends BaseDL4JTest { //Check training with updater - mlc = new NeuralNetConfiguration.Builder() + mlc = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .weightInit(WeightInit.XAVIER).seed(12345) .l1(l1Val).l2(l2Val) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/CrashReportingUtilTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/CrashReportingUtilTest.java index 8bfaa9eb2..49a9c7fa1 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/CrashReportingUtilTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/CrashReportingUtilTest.java @@ -24,7 +24,6 @@ import org.apache.commons.io.FileUtils; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.datasets.iterator.EarlyTerminationDataSetIterator; import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -80,8 +79,8 @@ public class CrashReportingUtilTest extends BaseDL4JTest { int width = 28; - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().updater(new NoOp()) + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder().updater(new NoOp()) .dist(new NormalDistribution(0, 1)) .list().layer(0, @@ -99,7 +98,7 @@ public class CrashReportingUtilTest extends BaseDL4JTest { .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX) .nOut(10).build()) - .setInputType(InputType.convolutionalFlat(height, width, + .inputType(InputType.convolutionalFlat(height, width, inputDepth)) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/ModelGuesserTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/ModelGuesserTest.java index 2ff1c481d..02a1fdaf5 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/ModelGuesserTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/ModelGuesserTest.java @@ -20,12 +20,11 @@ package org.deeplearning4j.util; +import net.brutex.ai.dnn.api.IModel; import org.apache.commons.compress.utils.IOUtils; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.core.util.ModelGuesser; -import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; @@ -60,11 +59,11 @@ public class ModelGuesserTest extends BaseDL4JTest { public void testModelGuessFile() throws Exception { File f = Resources.asFile("modelimport/keras/examples/mnist_mlp/mnist_mlp_tf_keras_1_model.h5"); assertTrue(f.exists()); - Model guess1 = ModelGuesser.loadModelGuess(f.getAbsolutePath()); + IModel guess1 = ModelGuesser.loadModelGuess(f.getAbsolutePath()); Assertions.assertNotNull(guess1); f = Resources.asFile("modelimport/keras/examples/mnist_cnn/mnist_cnn_tf_keras_1_model.h5"); assertTrue(f.exists()); - Model guess2 = ModelGuesser.loadModelGuess(f.getAbsolutePath()); + IModel guess2 = ModelGuesser.loadModelGuess(f.getAbsolutePath()); Assertions.assertNotNull(guess2); } @@ -75,7 +74,7 @@ public class ModelGuesserTest extends BaseDL4JTest { assertTrue(f.exists()); try (InputStream inputStream = new FileInputStream(f)) { - Model guess1 = ModelGuesser.loadModelGuess(inputStream); + IModel guess1 = ModelGuesser.loadModelGuess(inputStream); Assertions.assertNotNull(guess1); } @@ -83,7 +82,7 @@ public class ModelGuesserTest extends BaseDL4JTest { assertTrue(f.exists()); try (InputStream inputStream = new FileInputStream(f)) { - Model guess1 = ModelGuesser.loadModelGuess(inputStream); + IModel guess1 = ModelGuesser.loadModelGuess(inputStream); Assertions.assertNotNull(guess1); } } @@ -101,7 +100,7 @@ public class ModelGuesserTest extends BaseDL4JTest { NormalizerMinMaxScaler normalizer = new NormalizerMinMaxScaler(0, 1); normalizer.fit(new DataSet(Nd4j.rand(2, 2), Nd4j.rand(2, 2))); ModelSerializer.addNormalizerToModel(tempFile, normalizer); - Model model = ModelGuesser.loadModelGuess(tempFile.getAbsolutePath()); + IModel model = ModelGuesser.loadModelGuess(tempFile.getAbsolutePath()); Normalizer normalizer1 = ModelGuesser.loadNormalizer(tempFile.getAbsolutePath()); assertEquals(model, net); assertEquals(normalizer, normalizer1); @@ -119,7 +118,7 @@ public class ModelGuesserTest extends BaseDL4JTest { normalizer.fit(new DataSet(Nd4j.rand(2, 2), Nd4j.rand(2, 2))); ModelSerializer.writeModel(net, tempFile, true,normalizer); - Model model = ModelGuesser.loadModelGuess(tempFile.getAbsolutePath()); + IModel model = ModelGuesser.loadModelGuess(tempFile.getAbsolutePath()); Normalizer normalizer1 = ModelGuesser.loadNormalizer(tempFile.getAbsolutePath()); assertEquals(model, net); assertEquals(normalizer, normalizer1); @@ -137,7 +136,7 @@ public class ModelGuesserTest extends BaseDL4JTest { NormalizerMinMaxScaler normalizer = new NormalizerMinMaxScaler(0, 1); normalizer.fit(new DataSet(Nd4j.rand(2, 2), Nd4j.rand(2, 2))); ModelSerializer.addNormalizerToModel(tempFile, normalizer); - Model model = ModelGuesser.loadModelGuess(tempFile.getAbsolutePath()); + IModel model = ModelGuesser.loadModelGuess(tempFile.getAbsolutePath()); try (InputStream inputStream = new FileInputStream(tempFile)) { Normalizer normalizer1 = ModelGuesser.loadNormalizer(inputStream); assertEquals(model, net); @@ -156,7 +155,7 @@ public class ModelGuesserTest extends BaseDL4JTest { ModelSerializer.writeModel(net, tempFile, true); MultiLayerNetwork network = (MultiLayerNetwork) ModelGuesser.loadModelGuess(tempFile.getAbsolutePath()); - assertEquals(network.getLayerWiseConfigurations().toJson(), net.getLayerWiseConfigurations().toJson()); + assertEquals(network.getConfiguration().toJson(), net.getConfiguration().toJson()); assertEquals(net.params(), network.params()); assertEquals(net.getUpdater().getStateViewArray(), network.getUpdater().getStateViewArray()); @@ -173,7 +172,7 @@ public class ModelGuesserTest extends BaseDL4JTest { try (InputStream inputStream = new FileInputStream(tempFile)) { MultiLayerNetwork network = (MultiLayerNetwork) ModelGuesser.loadModelGuess(inputStream); Assertions.assertNotNull(network); - assertEquals(network.getLayerWiseConfigurations().toJson(), net.getLayerWiseConfigurations().toJson()); + assertEquals(network.getConfiguration().toJson(), net.getConfiguration().toJson()); assertEquals(net.params(), network.params()); assertEquals(net.getUpdater().getStateViewArray(), network.getUpdater().getStateViewArray()); } @@ -187,7 +186,7 @@ public class ModelGuesserTest extends BaseDL4JTest { File f = getTempFile(resource); String configFilename = f.getAbsolutePath(); Object conf = ModelGuesser.loadConfigGuess(configFilename); - assertTrue(conf instanceof MultiLayerConfiguration); + assertTrue(conf instanceof NeuralNetConfiguration); ClassPathResource sequenceResource = new ClassPathResource("/keras/simple/mlp_fapi_multiloss_config.json"); File f2 = getTempFile(sequenceResource); @@ -212,7 +211,7 @@ public class ModelGuesserTest extends BaseDL4JTest { try (InputStream inputStream = new FileInputStream(f)) { Object conf = ModelGuesser.loadConfigGuess(inputStream); - assertTrue(conf instanceof MultiLayerConfiguration); + assertTrue(conf instanceof NeuralNetConfiguration); } ClassPathResource sequenceResource = new ClassPathResource("/keras/simple/mlp_fapi_multiloss_config.json"); @@ -249,7 +248,7 @@ public class ModelGuesserTest extends BaseDL4JTest { int nIn = 5; int nOut = 6; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).l1(0.01).l2(0.01) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345).l1(0.01).l2(0.01) .updater(new Sgd(0.1)).activation(Activation.TANH).weightInit(WeightInit.XAVIER).list() .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(20).build()) .layer(1, new DenseLayer.Builder().nIn(20).nOut(30).build()).layer(2, new OutputLayer.Builder() diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/ModelSerializerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/ModelSerializerTest.java index e01d42f01..9f52ae300 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/ModelSerializerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/ModelSerializerTest.java @@ -26,7 +26,6 @@ import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; @@ -65,7 +64,7 @@ public class ModelSerializerTest extends BaseDL4JTest { int nIn = 5; int nOut = 6; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).l1(0.01) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345).l1(0.01) .l2(0.01).updater(new Sgd(0.1)).activation(Activation.TANH).weightInit(WeightInit.XAVIER).list() .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(20).build()) .layer(1, new DenseLayer.Builder().nIn(20).nOut(30).build()).layer(2, new OutputLayer.Builder() @@ -81,7 +80,7 @@ public class ModelSerializerTest extends BaseDL4JTest { MultiLayerNetwork network = ModelSerializer.restoreMultiLayerNetwork(tempFile); - assertEquals(network.getLayerWiseConfigurations().toJson(), net.getLayerWiseConfigurations().toJson()); + assertEquals(network.getConfiguration().toJson(), net.getConfiguration().toJson()); assertEquals(net.params(), network.params()); assertEquals(net.getUpdater().getStateViewArray(), network.getUpdater().getStateViewArray()); } @@ -91,7 +90,7 @@ public class ModelSerializerTest extends BaseDL4JTest { int nIn = 5; int nOut = 6; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).l1(0.01) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345).l1(0.01) .l2(0.01).updater(new Sgd(0.1)).activation(Activation.TANH).weightInit(WeightInit.XAVIER).list() .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(20).build()) .layer(1, new DenseLayer.Builder().nIn(20).nOut(30).build()).layer(2, new OutputLayer.Builder() @@ -125,7 +124,7 @@ public class ModelSerializerTest extends BaseDL4JTest { MultiLayerNetwork network = ModelSerializer.restoreMultiLayerNetwork(fis); - assertEquals(network.getLayerWiseConfigurations().toJson(), net.getLayerWiseConfigurations().toJson()); + assertEquals(network.getConfiguration().toJson(), net.getConfiguration().toJson()); assertEquals(net.params(), network.params()); assertEquals(net.getUpdater().getStateViewArray(), network.getUpdater().getStateViewArray()); } @@ -133,7 +132,7 @@ public class ModelSerializerTest extends BaseDL4JTest { @Test public void testWriteCGModel() throws Exception { - ComputationGraphConfiguration config = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration config = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(new Sgd(0.1)) .graphBuilder().addInputs("in") .addLayer("dense", new DenseLayer.Builder().nIn(4).nOut(2).build(), "in").addLayer("out", @@ -158,7 +157,7 @@ public class ModelSerializerTest extends BaseDL4JTest { @Test public void testWriteCGModelInputStream() throws Exception { - ComputationGraphConfiguration config = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration config = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(new Sgd(0.1)) .graphBuilder().addInputs("in") .addLayer("dense", new DenseLayer.Builder().nIn(4).nOut(2).build(), "in").addLayer("out", @@ -189,7 +188,7 @@ public class ModelSerializerTest extends BaseDL4JTest { } private ComputationGraph simpleComputationGraph() { - ComputationGraphConfiguration config = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration config = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(new Sgd(0.1)) .graphBuilder().addInputs("in") .addLayer("dense", new DenseLayer.Builder().nIn(4).nOut(2).build(), "in").addLayer("out", @@ -253,7 +252,7 @@ public class ModelSerializerTest extends BaseDL4JTest { @Test public void testInvalidLoading1() throws Exception { - ComputationGraphConfiguration config = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration config = NeuralNetConfiguration.builder() .graphBuilder().addInputs("in") .addLayer("dense", new DenseLayer.Builder().nIn(4).nOut(2).build(), "in") .addLayer("out",new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) @@ -282,7 +281,7 @@ public class ModelSerializerTest extends BaseDL4JTest { int nIn = 5; int nOut = 6; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).l1(0.01) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345).l1(0.01) .l2(0.01).updater(new Sgd(0.1)).activation(Activation.TANH).weightInit(WeightInit.XAVIER).list() .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(20).build()) .layer(1, new DenseLayer.Builder().nIn(20).nOut(30).build()).layer(2, new OutputLayer.Builder() @@ -310,7 +309,7 @@ public class ModelSerializerTest extends BaseDL4JTest { int nIn = 5; int nOut = 6; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).l1(0.01) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345).l1(0.01) .list() .layer(new OutputLayer.Builder().nIn(nIn).nOut(nOut).activation(Activation.SOFTMAX).build()) .build(); @@ -357,7 +356,7 @@ public class ModelSerializerTest extends BaseDL4JTest { int nIn = 5; int nOut = 6; - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).l1(0.01) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345).l1(0.01) .graphBuilder() .addInputs("in") .layer("0", new OutputLayer.Builder().nIn(nIn).nOut(nOut).activation(Activation.SOFTMAX).build(), "in") @@ -406,7 +405,7 @@ public class ModelSerializerTest extends BaseDL4JTest { int nIn = 5; int nOut = 6; - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).l1(0.01) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345).l1(0.01) .graphBuilder() .addInputs("in") .layer("0", new OutputLayer.Builder().nIn(nIn).nOut(nOut).build(), "in") @@ -433,7 +432,7 @@ public class ModelSerializerTest extends BaseDL4JTest { int nIn = 5; int nOut = 6; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).l1(0.01) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345).l1(0.01) .list() .layer(0, new OutputLayer.Builder().nIn(nIn).nOut(nOut).activation(Activation.SOFTMAX).build()) .build(); @@ -458,7 +457,7 @@ public class ModelSerializerTest extends BaseDL4JTest { int nIn = 5; int nOut = 6; - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).l1(0.01) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345).l1(0.01) .graphBuilder() .addInputs("in") .layer("0", new OutputLayer.Builder().nIn(nIn).nOut(nOut).activation(Activation.SOFTMAX).build(), "in") diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/ModelValidatorTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/ModelValidatorTests.java index 9d6a27183..eef3472d2 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/ModelValidatorTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/ModelValidatorTests.java @@ -23,7 +23,6 @@ package org.deeplearning4j.util; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.deeplearning4j.BaseDL4JTest; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; @@ -167,7 +166,7 @@ public class ModelValidatorTests extends BaseDL4JTest { assertFalse(vr6.isValid()); s = vr6.getIssues().get(0); assertEquals(1, vr6.getIssues().size()); - assertTrue(s.contains("JSON") && s.contains("valid") && s.contains("MultiLayerConfiguration"), s); + assertTrue(s.contains("JSON") && s.contains("valid") && s.contains("NeuralNetConfiguration"), s); assertEquals("MultiLayerNetwork", vr6.getFormatType()); assertEquals(MultiLayerNetwork.class, vr6.getFormatClass()); assertNotNull(vr6.getException()); @@ -296,7 +295,7 @@ public class ModelValidatorTests extends BaseDL4JTest { public static MultiLayerNetwork getSimpleNet(){ - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .seed(12345) .updater(new Adam(0.01)) .list() diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasLayer.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasLayer.java index 601237b53..810dbce85 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasLayer.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasLayer.java @@ -27,13 +27,12 @@ import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.graph.GraphVertex; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.FeedForwardLayer; -import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.samediff.SameDiffLambdaLayer; import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfigurationFactory; import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException; -import org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasConvolutionUtils; import org.deeplearning4j.nn.modelimport.keras.utils.KerasLayerUtils; import org.deeplearning4j.nn.modelimport.keras.utils.KerasRegularizerUtils; import org.nd4j.common.util.ArrayUtil; @@ -57,7 +56,7 @@ public class KerasLayer { protected DimOrder dimOrder; // Keras layer backend dimension order protected List inboundLayerNames; // List of inbound layers protected List outboundLayerNames; //List of outbound layers - protected Layer layer; // Resulting DL4J layer + protected LayerConfiguration layer; // Resulting DL4J layer protected GraphVertex vertex; // Resulting DL4J vertex protected Map weights; // Weights protected double weightL1Regularization = 0.0; // L1 regularization @@ -302,7 +301,7 @@ public class KerasLayer { */ public void copyWeightsToLayer(org.deeplearning4j.nn.api.Layer layer) throws InvalidKerasConfigurationException { if (this.getNumParams() > 0) { - String dl4jLayerName = layer.conf().getLayer().getLayerName(); + String dl4jLayerName = layer.getLayerConfiguration().getLayerName(); String kerasLayerName = this.getLayerName(); String msg = "Error when attempting to copy weights from Keras layer " + kerasLayerName + " to DL4J layer " + dl4jLayerName; @@ -310,7 +309,7 @@ public class KerasLayer { if (getWeights() == null) throw new InvalidKerasConfigurationException(msg + "(weights is null)"); - Set paramsInLayer = new HashSet<>(layer.paramTable().keySet()); + Set paramsInLayer = new HashSet<>(layer.getParamTable().keySet()); Set paramsInKerasLayer = new HashSet<>(this.weights.keySet()); /* Check for parameters in layer for which we don't have weights. */ @@ -322,7 +321,7 @@ public class KerasLayer { } /* Check for parameters NOT in layer for which we DO have weights. */ - paramsInKerasLayer.removeAll(layer.paramTable().keySet()); + paramsInKerasLayer.removeAll(layer.getParamTable().keySet()); if (!paramsInKerasLayer.isEmpty()) { String joinedParamsInKerasLayer = StringUtils.join(paramsInKerasLayer, ", "); throw new InvalidKerasConfigurationException( @@ -330,9 +329,9 @@ public class KerasLayer { } /* Copy weights. */ - for (String paramName : layer.paramTable().keySet()) { + for (String paramName : layer.getParamTable().keySet()) { try { - long[] dl4jWeights = layer.paramTable().get(paramName).shape(); + long[] dl4jWeights = layer.getParamTable().get(paramName).shape(); long[] kerasWeights = weights.get(paramName).shape(); INDArray variable = this.weights.get(paramName); if(!Arrays.equals(dl4jWeights,kerasWeights) && @@ -348,7 +347,7 @@ public class KerasLayer { log.error(e.getMessage()); throw new InvalidKerasConfigurationException(e.getMessage() + "\nTried to set weights for layer with name " + this.getLayerName() - + ", of " + layer.conf().getLayer().getClass() + ".\n" + + ", of " + layer.getLayerConfiguration().getClass() + ".\n" + "Failed to set weights for parameter " + paramName + "\n" + "Expected shape for this parameter: " + layer.getParam(paramName).shapeInfoToString() + ", \ngot: " + this.weights.get(paramName).shapeInfoToString()); @@ -372,11 +371,11 @@ public class KerasLayer { * @return DL4J ILayer * @see org.deeplearning4j.nn.api.Layer */ - public Layer getLayer() { + public LayerConfiguration getLayer() { return this.layer; } - public void setLayer(Layer layer){ + public void setLayer(LayerConfiguration layer){ this.layer = layer; } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasModel.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasModel.java index ea0b99f0c..4ce518eac 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasModel.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasModel.java @@ -22,11 +22,10 @@ package org.deeplearning4j.nn.modelimport.keras; import lombok.Data; import lombok.extern.slf4j.Slf4j; -import org.apache.commons.collections4.set.ListOrderedSet; import org.deeplearning4j.nn.conf.*; import org.deeplearning4j.nn.conf.graph.PreprocessorVertex; import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.samediff.SameDiffLambdaLayer; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration; @@ -44,13 +43,10 @@ import org.deeplearning4j.nn.modelimport.keras.utils.KerasModelBuilder; import org.deeplearning4j.nn.modelimport.keras.utils.KerasModelUtils; import org.deeplearning4j.nn.modelimport.keras.utils.KerasOptimizerUtils; import org.deeplearning4j.util.ConvolutionUtils; -import org.nd4j.autodiff.samediff.internal.DependencyList; -import org.nd4j.autodiff.samediff.internal.DependencyTracker; import org.nd4j.common.primitives.Counter; import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.learning.config.IUpdater; import com.google.common.collect.Lists; -import org.tensorflow.framework.NodeDef; import java.io.IOException; import java.util.*; @@ -444,7 +440,7 @@ public class KerasModel { } KerasInput kerasInput = (KerasInput) layer; - Layer layer1 = layersOrdered.get(kerasLayerIdx + 1).layer; + LayerConfiguration layer1 = layersOrdered.get(kerasLayerIdx + 1).layer; //no dim order, try to pull it from the next layer if there is one if(ConvolutionUtils.layerHasConvolutionLayout(layer1)) { CNN2DFormat formatForLayer = ConvolutionUtils.getFormatForLayer(layer1); @@ -491,7 +487,7 @@ public class KerasModel { && !this.className.equals(config.getFieldNameClassFunctional())) throw new InvalidKerasConfigurationException( "Keras model class name " + this.className + " incompatible with ComputationGraph"); - NeuralNetConfiguration.Builder modelBuilder = new NeuralNetConfiguration.Builder(); + NeuralNetConfiguration.NeuralNetConfigurationBuilder modelBuilder = NeuralNetConfiguration.builder(); if (optimizer != null) { modelBuilder.updater(optimizer); @@ -597,8 +593,8 @@ public class KerasModel { /* Whether to use standard backprop (or BPTT) or truncated BPTT. */ if (this.useTruncatedBPTT && this.truncatedBPTT > 0) - graphBuilder.backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(truncatedBPTT) - .tBPTTBackwardLength(truncatedBPTT); + graphBuilder.backpropType(BackpropType.TruncatedBPTT).tbpttFwdLength(truncatedBPTT) + .tbpttBackLength(truncatedBPTT); else graphBuilder.backpropType(BackpropType.Standard); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasModelImport.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasModelImport.java index c9f3d15a0..850cdd7ad 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasModelImport.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasModelImport.java @@ -23,7 +23,7 @@ package org.deeplearning4j.nn.modelimport.keras; import lombok.extern.slf4j.Slf4j; import org.apache.commons.io.IOUtils; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException; @@ -341,12 +341,12 @@ public class KerasModelImport { * @throws IOException IO exception * @see MultiLayerNetwork */ - public static MultiLayerConfiguration importKerasSequentialConfiguration(String modelJsonFilename, + public static NeuralNetConfiguration importKerasSequentialConfiguration(String modelJsonFilename, boolean enforceTrainingConfig) throws IOException, InvalidKerasConfigurationException, UnsupportedKerasConfigurationException { KerasSequentialModel kerasModel = new KerasSequentialModel().modelBuilder().modelJsonFilename(modelJsonFilename) .enforceTrainingConfig(enforceTrainingConfig).buildSequential(); - return kerasModel.getMultiLayerConfiguration(); + return kerasModel.getNeuralNetConfiguration(); } /** @@ -358,11 +358,11 @@ public class KerasModelImport { * @throws IOException IO exception * @see MultiLayerNetwork */ - public static MultiLayerConfiguration importKerasSequentialConfiguration(String modelJsonFilename) + public static NeuralNetConfiguration importKerasSequentialConfiguration(String modelJsonFilename) throws IOException, InvalidKerasConfigurationException, UnsupportedKerasConfigurationException { KerasSequentialModel kerasModel = new KerasSequentialModel().modelBuilder().modelJsonFilename(modelJsonFilename) .enforceTrainingConfig(false).buildSequential(); - return kerasModel.getMultiLayerConfiguration(); + return kerasModel.getNeuralNetConfiguration(); } private static File toTempFile(InputStream is) throws IOException { diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasSequentialModel.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasSequentialModel.java index 696dc3df9..2a99d0c34 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasSequentialModel.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasSequentialModel.java @@ -23,7 +23,6 @@ package org.deeplearning4j.nn.modelimport.keras; import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.nn.conf.BackpropType; import org.deeplearning4j.nn.conf.InputPreProcessor; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; @@ -159,11 +158,11 @@ public class KerasSequentialModel extends KerasModel { } /** - * Configure a MultiLayerConfiguration from this Keras Sequential model configuration. + * Configure a NeuralNetConfiguration from this Keras Sequential model configuration. * - * @return MultiLayerConfiguration + * @return NeuralNetConfiguration */ - public MultiLayerConfiguration getMultiLayerConfiguration() + public NeuralNetConfiguration getNeuralNetConfiguration() throws InvalidKerasConfigurationException, UnsupportedKerasConfigurationException { if (!this.className.equals(config.getFieldClassNameSequential())) throw new InvalidKerasConfigurationException( @@ -175,15 +174,15 @@ public class KerasSequentialModel extends KerasModel { throw new InvalidKerasConfigurationException( "MultiLayerNetwork expects only 1 output (found " + this.outputLayerNames.size() + ")"); - NeuralNetConfiguration.Builder modelBuilder = new NeuralNetConfiguration.Builder(); + NeuralNetConfiguration.NeuralNetConfigurationBuilder modelBuilder = NeuralNetConfiguration.builder(); if (optimizer != null) { modelBuilder.updater(optimizer); } - NeuralNetConfiguration.ListBuilder listBuilder = modelBuilder.list(); - //don't forcibly over ride for keras import - listBuilder.overrideNinUponBuild(false); + + //don't forcibly override for keras import + modelBuilder.overrideNinUponBuild(false); /* Add layers one at a time. */ KerasLayer prevLayer = null; int layerIndex = 0; @@ -192,7 +191,7 @@ public class KerasSequentialModel extends KerasModel { int nbInbound = layer.getInboundLayerNames().size(); if (nbInbound != 1) throw new InvalidKerasConfigurationException( - "Layers in MultiLayerConfiguration must have exactly one inbound layer (found " + "Layers in NeuralNetConfiguration must have exactly one inbound layer (found " + nbInbound + " for layer " + layer.getLayerName() + ")"); if (prevLayer != null) { InputType[] inputTypes = new InputType[1]; @@ -201,39 +200,40 @@ public class KerasSequentialModel extends KerasModel { inputTypes[0] = this.outputTypes.get(prevLayer.getInboundLayerNames().get(0)); preprocessor = prevLayer.getInputPreprocessor(inputTypes); InputType outputType = preprocessor.getOutputType(inputTypes[0]); - layer.getLayer().setNIn(outputType,listBuilder.isOverrideNinUponBuild()); + layer.getLayer().setNIn(outputType,modelBuilder.isOverrideNinUponBuild()); } else { inputTypes[0] = this.outputTypes.get(prevLayer.getLayerName()); preprocessor = layer.getInputPreprocessor(inputTypes); if(preprocessor != null) { InputType outputType = preprocessor.getOutputType(inputTypes[0]); - layer.getLayer().setNIn(outputType,listBuilder.isOverrideNinUponBuild()); + layer.getLayer().setNIn(outputType,modelBuilder.isOverrideNinUponBuild()); } else - layer.getLayer().setNIn(inputTypes[0],listBuilder.isOverrideNinUponBuild()); + layer.getLayer().setNIn(inputTypes[0],modelBuilder.isOverrideNinUponBuild()); } if (preprocessor != null) - listBuilder.inputPreProcessor(layerIndex, preprocessor); + modelBuilder.inputPreProcessor(layerIndex, preprocessor); } - listBuilder.layer(layerIndex++, layer.getLayer()); + modelBuilder.layer(layerIndex++, layer.getLayer()); } else if (layer.getVertex() != null) - throw new InvalidKerasConfigurationException("Cannot add vertex to MultiLayerConfiguration (class name " + throw new InvalidKerasConfigurationException("Cannot add vertex to NeuralNetConfiguration (class name " + layer.getClassName() + ", layer name " + layer.getLayerName() + ")"); prevLayer = layer; } /* Whether to use standard backprop (or BPTT) or truncated BPTT. */ if (this.useTruncatedBPTT && this.truncatedBPTT > 0) - listBuilder.backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(truncatedBPTT) - .tBPTTBackwardLength(truncatedBPTT); + modelBuilder.backpropType(BackpropType.TruncatedBPTT) + .tbpttFwdLength(truncatedBPTT) + .tbpttBackLength(truncatedBPTT); else - listBuilder.backpropType(BackpropType.Standard); + modelBuilder.backpropType(BackpropType.Standard); - MultiLayerConfiguration build = listBuilder.build(); + NeuralNetConfiguration build = modelBuilder.build(); return build; @@ -256,7 +256,7 @@ public class KerasSequentialModel extends KerasModel { */ public MultiLayerNetwork getMultiLayerNetwork(boolean importWeights) throws InvalidKerasConfigurationException, UnsupportedKerasConfigurationException { - MultiLayerNetwork model = new MultiLayerNetwork(getMultiLayerConfiguration()); + MultiLayerNetwork model = new MultiLayerNetwork(getNeuralNetConfiguration()); model.init(); if (importWeights) model = (MultiLayerNetwork) KerasModelUtils.copyWeightsToModel(model, this.layers); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/TFOpLayer.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/TFOpLayer.java index 8e30f72f2..11bb40d58 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/TFOpLayer.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/TFOpLayer.java @@ -26,9 +26,8 @@ import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.RNNFormat; import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; -import org.deeplearning4j.nn.modelimport.keras.layers.TFOpLayerImpl; import org.deeplearning4j.nn.params.EmptyParamInitializer; import org.deeplearning4j.optimize.api.TrainingListener; import org.nd4j.linalg.api.buffer.DataType; @@ -41,7 +40,7 @@ import java.util.List; import java.util.Map; -public class TFOpLayer extends Layer { +public class TFOpLayer extends LayerConfiguration { private final Map nodeDef; private final Map constants; @@ -90,7 +89,8 @@ public class TFOpLayer extends Layer { Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { - TFOpLayerImpl tfOpLayerImpl = new TFOpLayerImpl(nodeDef, constants, conf, networkDataType); + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + TFOpLayerImpl tfOpLayerImpl = new TFOpLayerImpl(nodeDef, constants, lconf, networkDataType); tfOpLayerImpl.setListeners(trainingListeners); tfOpLayerImpl.setIndex(layerIndex); return tfOpLayerImpl; diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/TFOpLayerImpl.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/TFOpLayerImpl.java index ba2b98db4..43ce8e985 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/TFOpLayerImpl.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/TFOpLayerImpl.java @@ -26,6 +26,7 @@ import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.ArrayUtils; import org.deeplearning4j.common.config.DL4JClassLoading; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.AbstractLayer; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; @@ -56,7 +57,7 @@ public class TFOpLayerImpl extends AbstractLayer { private List inputNames; TFGraphRunnerService graphRunnerService; - public TFOpLayerImpl(Map nodeDef, Map constants, NeuralNetConfiguration conf, DataType dtype){ + public TFOpLayerImpl(Map nodeDef, Map constants, LayerConfiguration conf, DataType dtype){ super(conf, dtype); this.nodeDef = nodeDef; this.constants = constants; diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLSTM.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLSTM.java index e1c6be765..97ceac993 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLSTM.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLSTM.java @@ -216,7 +216,7 @@ public class KerasLSTM extends KerasLayer { * * @return LSTM ILayer */ - public Layer getLSTMLayer() { + public LayerConfiguration getLSTMLayer() { return layer; } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasSimpleRnn.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasSimpleRnn.java index ea71fc8d7..35a1aed01 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasSimpleRnn.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasSimpleRnn.java @@ -29,7 +29,7 @@ import org.deeplearning4j.nn.conf.RNNFormat; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.FeedForwardLayer; import org.deeplearning4j.nn.conf.layers.InputTypeUtil; -import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.recurrent.LastTimeStep; import org.deeplearning4j.nn.conf.layers.recurrent.SimpleRnn; import org.deeplearning4j.nn.conf.layers.util.MaskZeroLayer; @@ -186,7 +186,7 @@ public class KerasSimpleRnn extends KerasLayer { * * @return SimpleRnn ILayer */ - public Layer getSimpleRnnLayer() { + public LayerConfiguration getSimpleRnnLayer() { return this.layer; } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/wrappers/KerasBidirectional.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/wrappers/KerasBidirectional.java index ccbbbd9d6..3da1a4642 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/wrappers/KerasBidirectional.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/wrappers/KerasBidirectional.java @@ -24,10 +24,9 @@ import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.InputTypeUtil; import org.deeplearning4j.nn.conf.layers.LSTM; -import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.recurrent.Bidirectional; import org.deeplearning4j.nn.conf.layers.recurrent.LastTimeStep; -import org.deeplearning4j.nn.conf.layers.recurrent.SimpleRnn; import org.deeplearning4j.nn.modelimport.keras.KerasLayer; import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException; @@ -146,7 +145,7 @@ public class KerasBidirectional extends KerasLayer { break; case "SimpleRNN": kerasRnnlayer = new KerasSimpleRnn(innerRnnConfig, enforceTrainingConfig, previousLayers); - Layer rnnLayer = ((KerasSimpleRnn) kerasRnnlayer).getSimpleRnnLayer(); + LayerConfiguration rnnLayer = ((KerasSimpleRnn) kerasRnnlayer).getSimpleRnnLayer(); this.layer = new Bidirectional(mode, rnnLayer); layer.setLayerName(layerName); break; @@ -162,7 +161,7 @@ public class KerasBidirectional extends KerasLayer { * * @return ILayer, recurrent layer */ - public Layer getUnderlyingRecurrentLayer() { + public LayerConfiguration getUnderlyingRecurrentLayer() { return kerasRnnlayer.getLayer(); } @@ -240,7 +239,7 @@ public class KerasBidirectional extends KerasLayer { } - private Map getUnderlyingWeights(Layer l, Map weights, String direction) + private Map getUnderlyingWeights(LayerConfiguration l, Map weights, String direction) throws InvalidKerasConfigurationException { int keras1SubstringLength; if (kerasRnnlayer instanceof KerasLSTM) @@ -269,7 +268,7 @@ public class KerasBidirectional extends KerasLayer { weights = newWeights; } - Layer layerBefore = kerasRnnlayer.getLayer(); + LayerConfiguration layerBefore = kerasRnnlayer.getLayer(); kerasRnnlayer.setLayer(l); kerasRnnlayer.setWeights(weights); Map ret = kerasRnnlayer.getWeights(); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasLayerUtils.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasLayerUtils.java index 536afb915..883ff4dd7 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasLayerUtils.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasLayerUtils.java @@ -22,7 +22,7 @@ package org.deeplearning4j.nn.modelimport.keras.utils; import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.nn.conf.graph.ElementWiseVertex; -import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.samediff.SameDiffLambdaLayer; import org.deeplearning4j.nn.modelimport.keras.KerasLayer; import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; @@ -34,7 +34,6 @@ import org.deeplearning4j.nn.modelimport.keras.layers.KerasTFOpLayer; import org.deeplearning4j.nn.modelimport.keras.layers.advanced.activations.*; import org.deeplearning4j.nn.modelimport.keras.layers.convolutional.*; import org.deeplearning4j.nn.modelimport.keras.layers.core.*; -import org.deeplearning4j.nn.modelimport.keras.layers.embeddings.Keras2DEmbedding; import org.deeplearning4j.nn.modelimport.keras.layers.embeddings.KerasEmbedding; import org.deeplearning4j.nn.modelimport.keras.layers.local.KerasLocallyConnected1D; import org.deeplearning4j.nn.modelimport.keras.layers.noise.KerasAlphaDropout; @@ -48,7 +47,6 @@ import org.deeplearning4j.nn.modelimport.keras.layers.pooling.KerasPooling3D; import org.deeplearning4j.nn.modelimport.keras.layers.recurrent.KerasLSTM; import org.deeplearning4j.nn.modelimport.keras.layers.recurrent.KerasSimpleRnn; import org.deeplearning4j.nn.modelimport.keras.layers.wrappers.KerasBidirectional; -import org.nd4j.common.primitives.Counter; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.common.primitives.Pair; @@ -154,7 +152,7 @@ public class KerasLayerUtils { * * @param layerConfig map containing Keras layer properties * @return KerasLayer - * @see Layer + * @see LayerConfiguration */ public static KerasLayer getKerasLayerFromConfig(Map layerConfig, KerasLayerConfiguration conf, @@ -174,7 +172,7 @@ public class KerasLayerUtils { * @param layerConfig map containing Keras layer properties * @param enforceTrainingConfig whether to enforce training-only configurations * @return KerasLayer - * @see Layer + * @see LayerConfiguration */ public static KerasLayer getKerasLayerFromConfig(Map layerConfig, boolean enforceTrainingConfig, diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasModelUtils.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasModelUtils.java index 43f3b244f..969626676 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasModelUtils.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasModelUtils.java @@ -24,7 +24,7 @@ package org.deeplearning4j.nn.modelimport.keras.utils; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.StringUtils; import org.deeplearning4j.nn.api.Layer; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.modelimport.keras.Hdf5Archive; import org.deeplearning4j.nn.modelimport.keras.KerasLayer; @@ -55,7 +55,7 @@ public class KerasModelUtils { * @return DL4J Model interface * @throws InvalidKerasConfigurationException Invalid Keras config */ - public static Model copyWeightsToModel(Model model, Map kerasLayers) + public static IModel copyWeightsToModel(IModel model, Map kerasLayers) throws InvalidKerasConfigurationException { /* Get list if layers from model. */ Layer[] layersFromModel; @@ -67,7 +67,7 @@ public class KerasModelUtils { /* Iterate over layers in model, setting weights when relevant. */ Set layerNames = new HashSet<>(kerasLayers.keySet()); for (org.deeplearning4j.nn.api.Layer layer : layersFromModel) { - String layerName = layer.conf().getLayer().getLayerName(); + String layerName = layer.getLayerConfiguration().getLayerName(); if (!kerasLayers.containsKey(layerName)) throw new InvalidKerasConfigurationException( "No weights found for layer in model (named " + layerName + ")"); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/FullModelComparisons.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/FullModelComparisons.java index f50df5084..db0fc466b 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/FullModelComparisons.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/FullModelComparisons.java @@ -48,7 +48,6 @@ import org.nd4j.common.resources.Resources; import java.io.File; import java.io.IOException; import java.io.InputStream; -import java.util.Arrays; import java.util.LinkedList; import java.util.List; @@ -88,7 +87,7 @@ public class FullModelComparisons extends BaseDL4JTest { // 1. ILayer LSTM firstLstm = (LSTM) model.getLayer(0); org.deeplearning4j.nn.conf.layers.LSTM firstConf = - (org.deeplearning4j.nn.conf.layers.LSTM) firstLstm.conf().getLayer(); + (org.deeplearning4j.nn.conf.layers.LSTM) firstLstm.getLayerConfiguration(); // "unit_forget_bias": true assertEquals(1.0, firstConf.getForgetGateBiasInit()); @@ -126,7 +125,7 @@ public class FullModelComparisons extends BaseDL4JTest { // 2. ILayer LSTM secondLstm = (LSTM) ((LastTimeStepLayer) model.getLayer(1)).getUnderlying(); org.deeplearning4j.nn.conf.layers.LSTM secondConf = - (org.deeplearning4j.nn.conf.layers.LSTM) secondLstm.conf().getLayer(); + (org.deeplearning4j.nn.conf.layers.LSTM) secondLstm.getLayerConfiguration(); // "unit_forget_bias": true assertEquals(1.0, secondConf.getForgetGateBiasInit()); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/Keras1ModelConfigurationTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/Keras1ModelConfigurationTest.java index fc48183e2..0a1bcb4a9 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/Keras1ModelConfigurationTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/Keras1ModelConfigurationTest.java @@ -22,7 +22,7 @@ package org.deeplearning4j.nn.modelimport.keras.configurations; import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.nn.modelimport.keras.KerasModel; @@ -140,9 +140,9 @@ public class Keras1ModelConfigurationTest extends BaseDL4JTest { private void runSequentialConfigTest(String path, boolean training) throws Exception { try(InputStream is = Resources.asStream(path)) { - MultiLayerConfiguration config = + NeuralNetConfiguration config = new KerasModel().modelBuilder().modelJsonInputStream(is) - .enforceTrainingConfig(training).buildSequential().getMultiLayerConfiguration(); + .enforceTrainingConfig(training).buildSequential().getNeuralNetConfiguration(); MultiLayerNetwork model = new MultiLayerNetwork(config); model.init(); } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/Keras2ModelConfigurationTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/Keras2ModelConfigurationTest.java index 05f6162f3..9bb3e5b4a 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/Keras2ModelConfigurationTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/Keras2ModelConfigurationTest.java @@ -23,7 +23,7 @@ package org.deeplearning4j.nn.modelimport.keras.configurations; import lombok.extern.slf4j.Slf4j; import lombok.val; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.nn.modelimport.keras.KerasLayer; @@ -42,7 +42,6 @@ import org.nd4j.common.resources.Resources; import java.io.File; import java.io.IOException; import java.io.InputStream; -import java.util.Arrays; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.assertArrayEquals; @@ -260,9 +259,9 @@ public class Keras2ModelConfigurationTest extends BaseDL4JTest { @Test public void oneLstmLayerTest() throws Exception { try(InputStream is = Resources.asStream("/modelimport/keras/configs/keras2/one_lstm_no_sequences_tf_keras_2.json")) { - MultiLayerConfiguration config = + NeuralNetConfiguration config = new KerasModel().modelBuilder().modelJsonInputStream(is) - .enforceTrainingConfig(false).buildSequential().getMultiLayerConfiguration(); + .enforceTrainingConfig(false).buildSequential().getNeuralNetConfiguration(); MultiLayerNetwork model = new MultiLayerNetwork(config); model.init(); INDArray input = Nd4j.create(DataType.FLOAT, 50, 1500, 500); //NWC format - [Minibatch, seqLength, channels] @@ -287,9 +286,9 @@ public class Keras2ModelConfigurationTest extends BaseDL4JTest { private void runSequentialConfigTest(String path) throws Exception { try(InputStream is = Resources.asStream(path)) { - MultiLayerConfiguration config = + NeuralNetConfiguration config = new KerasModel().modelBuilder().modelJsonInputStream(is) - .enforceTrainingConfig(false).buildSequential().getMultiLayerConfiguration(); + .enforceTrainingConfig(false).buildSequential().getNeuralNetConfiguration(); MultiLayerNetwork model = new MultiLayerNetwork(config); model.init(); } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/KerasModelImportTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/KerasModelImportTest.java index c45b3c52b..20721371b 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/KerasModelImportTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/KerasModelImportTest.java @@ -20,12 +20,13 @@ package org.deeplearning4j.nn.modelimport.keras.configurations; +import java.util.List; import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.nn.conf.CNN2DFormat; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.SubsamplingLayer; import org.deeplearning4j.nn.modelimport.keras.KerasModelImport; import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; @@ -34,7 +35,6 @@ import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Timeout; import org.nd4j.common.resources.Resources; -import org.nd4j.linalg.convolution.Convolution; import org.nd4j.linalg.factory.Nd4j; import java.io.IOException; @@ -57,12 +57,12 @@ public class KerasModelImportTest extends BaseDL4JTest { @Test public void testNCHWNWHCChangeImport() { MultiLayerNetwork model = loadModel("modelimport/keras/weights/conv2dnchw/simpleconv2d.hdf5"); - MultiLayerConfiguration multiLayerConfiguration = model.getLayerWiseConfigurations(); - ConvolutionLayer convolutionLayer = (ConvolutionLayer) multiLayerConfiguration.getConf(0).getLayer(); + List layerConfigs = model.getConfiguration().getFlattenedLayerConfigurations(); + ConvolutionLayer convolutionLayer = (ConvolutionLayer) layerConfigs.get(0); assertEquals(CNN2DFormat.NCHW,convolutionLayer.getCnn2dDataFormat()); - SubsamplingLayer subsamplingLayer = (SubsamplingLayer) multiLayerConfiguration.getConf(1).getLayer(); + SubsamplingLayer subsamplingLayer = (SubsamplingLayer) layerConfigs.get(1); assertEquals(CNN2DFormat.NHWC,subsamplingLayer.getCnn2dDataFormat()); - ConvolutionLayer convolutionLayer1 = (ConvolutionLayer) multiLayerConfiguration.getConf(2).getLayer(); + ConvolutionLayer convolutionLayer1 = (ConvolutionLayer) layerConfigs.get(2); assertEquals(CNN2DFormat.NHWC,convolutionLayer1.getCnn2dDataFormat()); model.output(Nd4j.zeros(1,1,28,28)); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasCustomLayerTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasCustomLayerTest.java index 67caf1e3b..f5b7584d3 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasCustomLayerTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasCustomLayerTest.java @@ -21,6 +21,7 @@ package org.deeplearning4j.nn.modelimport.keras.e2e; import lombok.extern.slf4j.Slf4j; +import net.brutex.ai.dnn.api.IModel; import org.apache.commons.io.FileUtils; import org.deeplearning4j.common.resources.DL4JResources; import org.deeplearning4j.nn.graph.ComputationGraph; @@ -61,7 +62,7 @@ public class KerasCustomLayerTest extends BaseDL4JTest { cachedKerasFile.deleteOnExit(); } - org.deeplearning4j.nn.api.Model importedModel = + IModel importedModel = KerasModelImport.importKerasModelAndWeights(cachedKerasFile.getAbsolutePath()); ModelSerializer.writeModel(importedModel, outputPath, false); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasModelEndToEndTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasModelEndToEndTest.java index 9b6797c06..2fea0bb82 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasModelEndToEndTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasModelEndToEndTest.java @@ -31,6 +31,7 @@ import org.deeplearning4j.nn.api.layers.IOutputLayer; import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.layers.Convolution1DLayer; import org.deeplearning4j.nn.conf.layers.FeedForwardLayer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.LossLayer; import org.deeplearning4j.nn.conf.layers.RnnOutputLayer; import org.deeplearning4j.nn.graph.ComputationGraph; @@ -38,7 +39,6 @@ import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.nn.modelimport.keras.Hdf5Archive; import org.deeplearning4j.nn.modelimport.keras.KerasModel; import org.deeplearning4j.nn.modelimport.keras.KerasSequentialModel; -import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.utils.KerasModelBuilder; import org.deeplearning4j.nn.modelimport.keras.utils.KerasModelUtils; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; @@ -971,7 +971,7 @@ public class KerasModelEndToEndTest extends BaseDL4JTest { if (net.getOutputLayer() instanceof IOutputLayer) { netToTest = net; } else { - org.deeplearning4j.nn.conf.layers.Layer l; + LayerConfiguration l; if (labels.rank() == 2) { l = new LossLayer.Builder() .lossFunction(LossFunctions.LossFunction.MSE) @@ -1000,11 +1000,11 @@ public class KerasModelEndToEndTest extends BaseDL4JTest { for (Layer l : netToTest.getLayers()) { // Remove any dropout manually - until this is fixed: // https://github.com/eclipse/deeplearning4j/issues/4368 - l.conf().getLayer().setIDropout(null); + l.getLayerConfiguration().setIDropout(null); //Also swap out activation functions... this is a bit of a hack, but should make the net gradient checkable... - if (l.conf().getLayer() instanceof FeedForwardLayer) { - FeedForwardLayer ffl = (FeedForwardLayer) l.conf().getLayer(); + if (l.getLayerConfiguration() instanceof FeedForwardLayer) { + FeedForwardLayer ffl = (FeedForwardLayer) l.getLayerConfiguration(); IActivation activation = ffl.getActivationFn(); if (activation instanceof ActivationReLU || activation instanceof ActivationLReLU) { ffl.setActivationFn(new ActivationSoftPlus()); diff --git a/cavis-dnn/cavis-dnn-nlp/src/test/java/org/deeplearning4j/models/word2vec/Word2VecTestsSmall.java b/cavis-dnn/cavis-dnn-nlp/src/test/java/org/deeplearning4j/models/word2vec/Word2VecTestsSmall.java index 19681185c..1d7144b20 100644 --- a/cavis-dnn/cavis-dnn-nlp/src/test/java/org/deeplearning4j/models/word2vec/Word2VecTestsSmall.java +++ b/cavis-dnn/cavis-dnn-nlp/src/test/java/org/deeplearning4j/models/word2vec/Word2VecTestsSmall.java @@ -24,7 +24,6 @@ import lombok.extern.slf4j.Slf4j; import lombok.val; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.models.paragraphvectors.ParagraphVectorsTest; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.EmbeddingLayer; @@ -53,7 +52,6 @@ import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.File; import java.util.Collection; -import java.util.concurrent.Callable; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -189,7 +187,7 @@ public class Word2VecTestsSmall extends BaseDL4JTest { INDArray w = vec.lookupTable().getWeights(); System.out.println(w); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .seed(12345).list() .layer(new EmbeddingLayer.Builder().weightInit(vec).build()) .layer(new DenseLayer.Builder().activation(Activation.TANH).nIn(w.size(1)).nOut(3).build()) @@ -210,7 +208,7 @@ public class Word2VecTestsSmall extends BaseDL4JTest { ByteArrayInputStream bais = new ByteArrayInputStream(bytes); final MultiLayerNetwork restored = ModelSerializer.restoreMultiLayerNetwork(bais, true); - assertEquals(net.getLayerWiseConfigurations(), restored.getLayerWiseConfigurations()); + assertEquals(net.getConfiguration(), restored.getConfiguration()); assertTrue(net.params().equalsWithEps(restored.params(), 2e-3)); } } diff --git a/cavis-dnn/cavis-dnn-nn/build.gradle b/cavis-dnn/cavis-dnn-nn/build.gradle index 0e097093d..59ff712ab 100644 --- a/cavis-dnn/cavis-dnn-nn/build.gradle +++ b/cavis-dnn/cavis-dnn-nn/build.gradle @@ -58,5 +58,4 @@ dependencies { implementation "com.squareup.okhttp3:okhttp" implementation "com.squareup.okhttp3:logging-interceptor" } -sourceCompatibility = JavaVersion.VERSION_11 -targetCompatibility = JavaVersion.VERSION_11 + diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/Animal.java b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/Animal.java new file mode 100644 index 000000000..2b5ac714c --- /dev/null +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/Animal.java @@ -0,0 +1,68 @@ +/* + * + * ****************************************************************************** + * * + * * This program and the accompanying materials are made available under the + * * terms of the Apache License, Version 2.0 which is available at + * * https://www.apache.org/licenses/LICENSE-2.0. + * * + * * See the NOTICE file distributed with this work for additional + * * information regarding copyright ownership. + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * * License for the specific language governing permissions and limitations + * * under the License. + * * + * * SPDX-License-Identifier: Apache-2.0 + * ***************************************************************************** + * + */ + +package net.brutex.ai.dnn.api; + +public class Animal { + + private String animalString; + + protected Animal(AnimalBuilder b) { + this.animalString = b.animalString; + } + + public static AnimalBuilder builder() { + return new AnimalBuilderImpl(); + } + + public static abstract class AnimalBuilder> { + + private String animalString; + + public B animalString(String animalString) { + this.animalString = animalString; + return self(); + } + + protected abstract B self(); + + public abstract C build(); + + public String toString() { + return "Animal.AnimalBuilder(animalString=" + this.animalString + ")"; + } + } + + private static final class AnimalBuilderImpl extends + AnimalBuilder { + + private AnimalBuilderImpl() { + } + + protected AnimalBuilderImpl self() { + return this; + } + + public Animal build() { + return new Animal(this); + } + } +} diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/IActivationFunction.java b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/IActivationFunction.java new file mode 100644 index 000000000..18794b8fe --- /dev/null +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/IActivationFunction.java @@ -0,0 +1,57 @@ +/* + * + * ****************************************************************************** + * * + * * This program and the accompanying materials are made available under the + * * terms of the Apache License, Version 2.0 which is available at + * * https://www.apache.org/licenses/LICENSE-2.0. + * * + * * See the NOTICE file distributed with this work for additional + * * information regarding copyright ownership. + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * * License for the specific language governing permissions and limitations + * * under the License. + * * + * * SPDX-License-Identifier: Apache-2.0 + * ***************************************************************************** + * + */ + +package net.brutex.ai.dnn.api; + +/** + * Activation Function An activation function takes in weighted data (matrix multiplication between + * input data and weights) and outputs a non-linear transformation of the data. For example, output + * = max(0,weighted_data) is the rectified linear activation function (essentially set all negative + * values to zero). The difference between units and activation functions is that units can be more + * complex, that is, a unit can have multiple activation functions (for example LSTM units) or a + * slightly more complex structure (for example maxout units). + *

+ * The difference between linear and non-linear activation functions can be shown with the + * relationship of some weighted values: Imagine the four points A1, A2, B1 and B2. The pairs A1 / + * A2, and B1 / B2 lie close to each other, but A1 is distant from B1 and B2, and vice versa; the + * same for A2. + *

+ * With a linear transformation the relationship between pairs might change. For example A1 and A2 + * might be far apart, but this implies that B1 and B2 are also far apart. The distance between the + * pairs might shrink, but if it does, then both B1 and B2 will be close to A1 and A2 at the same + * time. We can apply many linear transformations, but the relationship between A1 / A2 and B1 / B2 + * will always be similar. + *

+ * In contrast, with a non-linear activation function we can increase the distance between A1 and A2 + * while we decrease the distance between B1 and B2. We can make B1 close to A1, but B2 distant from + * A1. By applying non-linear functions, we create new relationships between the points. With every + * new non-linear transformation we can increase the complexity of the relationships. In deep + * learning, using non-linear activation functions creates increasingly complex features with every + * layer. + *

+ * In contrast, the features of 1000 layers of pure linear transformations can be reproduced by a + * single layer (because a chain of matrix multiplication can always be represented by a single + * matrix multiplication). This is why non-linear activation functions are so important in deep + * learning. + */ +public interface IActivationFunction { + +} diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/IModel.java b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/IModel.java index f0c6a722a..2c31319fc 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/IModel.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/IModel.java @@ -21,66 +21,262 @@ package net.brutex.ai.dnn.api; +import java.util.Collection; +import java.util.Map; +import lombok.NonNull; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.gradient.Gradient; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.deeplearning4j.optimize.api.ConvexOptimizer; +import org.deeplearning4j.optimize.api.TrainingListener; +import org.nd4j.common.primitives.Pair; +import org.nd4j.evaluation.IEvaluation; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.api.DataSet; import org.nd4j.linalg.dataset.api.MultiDataSet; +import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; +import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator; /** * A Neural Network is an instance of a {@link INeuralNetworkConfiguration}, that can be trained, * evaluated, saved, exported, etc. Its configuration state is defined with the - * {@link #setConfiguration(INeuralNetworkConfiguration)} and {@link #getConfiguration()} methods. - * - */ + * {@link #setNetConfiguration(NeuralNetConfiguration)} (INeuralNetworkConfiguration)} and + * {@link #getNetConfiguration()} methods. + **/ + public interface IModel { /** - * The configuration that defines this Neural Network + * This method returns updater state (if applicable), null otherwise * - * @param conf the configuration to use for this network + * @return */ - void setConfiguration(INeuralNetworkConfiguration conf); - INeuralNetworkConfiguration getConfiguration(); + INDArray updaterState(); /** - * Fit the model for one iteration on the provided data + * This method returns Optimizer used for training * - * @param features the examples to classify (one example in each row) - * @param labels the example labels(a binary outcome matrix) - * @param featuresMask The mask array for the features (used for variable length time series, etc). May be null. - * @param labelsMask The mask array for the labels (used for variable length time series, etc). May be null. + * @return */ - void fit(INDArray features, INDArray labels, INDArray featuresMask, INDArray labelsMask); + ConvexOptimizer getOptimizer(); /** * This method fits model with a given DataSet * - * @param dataSet the dataset to use for training + * @param dataSet */ void fit(DataSet dataSet); /** * This method fits model with a given MultiDataSet * - * @param dataSet the multi dataset to use for training + * @param dataSet */ void fit(MultiDataSet dataSet); /** - * The name of the Neural Network - * @return the name + * This method fits model with a given DataSetIterator + * + * @param iterator */ - String getName(); + void fit(DataSetIterator iterator); /** - * Set the name for this Neural Network - * @param name the name + * This method fits model with a given MultiDataSetIterator + * + * @param iterator */ - void setName(String name); + void fit(MultiDataSetIterator iterator); /** - * An implementation should provide a method to validate the network - * @return true if no errors found; false otherwise + * This method executes evaluation of the model against given iterator and evaluation + * implementations + * + * @param iterator */ - boolean isValid(); + T[] doEvaluation(DataSetIterator iterator, T... evaluations); + /** + * This method executes evaluation of the model against given iterator and evaluation + * implementations + * + * @param iterator + */ + T[] doEvaluation(MultiDataSetIterator iterator, T... evaluations); + + NeuralNetConfiguration getNetConfiguration(); + + void setNetConfiguration(@NonNull NeuralNetConfiguration netConfiguration); + + /** + * Init the model + */ + void init(); + + /** + * Get the number of parameters in this model + * @return number of parameters + */ + long numParams(); + + /** + * All models have a fit method + */ + @Deprecated + void fit(); + + /** + * Update layer weights and biases with gradient change + */ + void update(Gradient gradient); + + /** + * Perform one update applying the gradient + * + * @param gradient the gradient to apply + */ + void update(INDArray gradient, String paramType); + + + /** + * The score for the model + * + * @return the score for the model + */ + double score(); + + + /** + * Update the score + */ + void computeGradientAndScore(LayerWorkspaceMgr workspaceMgr); + + /** + * Parameters of the model (if any) + * + * @return the parameters of the model + */ + INDArray params(); + + + /** + * the number of parameters for the model + * + * @return the number of parameters for the model + */ + long numParams(boolean backwards); + + /** + * Set the parameters for this model. This expects a linear ndarray which then be unpacked + * internally relative to the expected ordering of the model + * + * @param params the parameters for the model + */ + void setParams(INDArray params); + + /** + * Set the initial parameters array as a view of the full (backprop) network parameters NOTE: this + * is intended to be used internally in MultiLayerNetwork and ComputationGraph, not by users. + * + * @param params a 1 x nParams row vector that is a view of the larger (MLN/CG) parameters array + */ + void setParamsViewArray(INDArray params); + + + INDArray getGradientsViewArray(); + + /** + * Set the gradients array as a view of the full (backprop) network parameters NOTE: this is + * intended to be used internally in MultiLayerNetwork and ComputationGraph, not by users. + * + * @param gradients a 1 x nParams row vector that is a view of the larger (MLN/CG) gradients + * array + */ + void setBackpropGradientsViewArray(INDArray gradients); + + /** + * Fit the model to the given data + * + * @param data the data to fit the model to + */ + void fit(INDArray data, LayerWorkspaceMgr workspaceMgr); + + + /** + * Get the gradient. Note that this method will not calculate the gradient, it will rather return + * the gradient that has been computed before. For calculating the gradient, see + * {@link IModel#computeGradientAndScore(LayerWorkspaceMgr)} } . + * + * @return the gradient for this model, as calculated before + */ + Gradient gradient(); + + /** + * Get the gradient and score + * + * @return the gradient and score + */ + Pair gradientAndScore(); + + /** + * The current inputs batch size + * + * @return the current inputs batch size + */ + int batchSize(); + + /** + * The input/feature matrix for the model + * + * @return the input/feature matrix for the model + */ + INDArray input(); + + /** + * Get a parameter array for a given parameter type key + * @param param the key of the parameter + * @return ndarray of parameters + */ + INDArray getParam(String param); + + + + /** + * Set the parameters for a given parameter type. + * @param key the param type key to set + * @param val the new parameters ndarray + */ + void setParam(String key, INDArray val); + + /** + * Clear input + */ + void clear(); + + + /** + * Apply any constraints to the model + */ + void applyConstraints(int iteration, int epoch); + + + void close(); + + /** + * Get the TrainingListeners + * @return training listener + */ + Collection getListeners(); + + /** + * Replace the TrainingListeners for this model + * @param listeners new listeners + */ + void setListeners(TrainingListener... listeners); + + /** + * Add TrainingListeners to the model + * @param listener listener to add + */ + void addListeners(TrainingListener... listener); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/INeuralNetwork.java b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/INeuralNetwork.java deleted file mode 100644 index 48d6c561b..000000000 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/INeuralNetwork.java +++ /dev/null @@ -1,122 +0,0 @@ -/* - * - * ****************************************************************************** - * * - * * This program and the accompanying materials are made available under the - * * terms of the Apache License, Version 2.0 which is available at - * * https://www.apache.org/licenses/LICENSE-2.0. - * * - * * See the NOTICE file distributed with this work for additional - * * information regarding copyright ownership. - * * Unless required by applicable law or agreed to in writing, software - * * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * * License for the specific language governing permissions and limitations - * * under the License. - * * - * * SPDX-License-Identifier: Apache-2.0 - * ***************************************************************************** - * - */ - -package net.brutex.ai.dnn.api; - -import net.brutex.ai.dnn.conf.NeuralNetworkConfiguration; -import org.deeplearning4j.optimize.api.ConvexOptimizer; -import org.nd4j.evaluation.IEvaluation; -import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.dataset.api.DataSet; -import org.nd4j.linalg.dataset.api.MultiDataSet; -import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; -import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator; - -/** - * @author raver119 - */ -public interface INeuralNetwork { - - /** - * This method does initialization of model - *

- * PLEASE NOTE: All implementations should track own state, to avoid double spending - */ - void init(); - - /** - * This method returns model parameters as single INDArray - * - * @return - */ - INDArray params(); - - /** - * This method returns updater state (if applicable), null otherwise - * - * @return - */ - INDArray updaterState(); - - /** - * This method returns Optimizer used for training - * - * @return - */ - ConvexOptimizer getOptimizer(); - - /** - * This method fits model with a given DataSet - * - * @param dataSet - */ - void fit(DataSet dataSet); - - /** - * This method fits model with a given MultiDataSet - * - * @param dataSet - */ - void fit(MultiDataSet dataSet); - - /** - * This method fits model with a given DataSetIterator - * - * @param iterator - */ - void fit(DataSetIterator iterator); - - /** - * This method fits model with a given MultiDataSetIterator - * - * @param iterator - */ - void fit(MultiDataSetIterator iterator); - - /** - * This method executes evaluation of the model against given iterator and evaluation - * implementations - * - * @param iterator - */ - T[] doEvaluation(DataSetIterator iterator, T... evaluations); - - /** - * This method executes evaluation of the model against given iterator and evaluation - * implementations - * - * @param iterator - */ - T[] doEvaluation(MultiDataSetIterator iterator, T... evaluations); - - /** - * A neural network is created from a configuration. - * @param conf the configuration to create the network from - */ - void setConfiguration(NeuralNetworkConfiguration conf); - - /** - * Return the configuration for this configuration - * @return - */ - NeuralNetworkConfiguration getConfiguration(); - -} diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/INeuralNetworkConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/INeuralNetworkConfiguration.java index 81d447fa3..b317e4ab0 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/INeuralNetworkConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/INeuralNetworkConfiguration.java @@ -21,10 +21,14 @@ package net.brutex.ai.dnn.api; +import java.io.Serializable; import java.util.List; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -public interface INeuralNetworkConfiguration { +public interface INeuralNetworkConfiguration extends Serializable, Cloneable { + INeuralNetworkConfiguration clone(); + void init(); } /** /** diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/IUnit.java b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/IUnit.java new file mode 100644 index 000000000..dd9643c6b --- /dev/null +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/IUnit.java @@ -0,0 +1,47 @@ +/* + * + * ****************************************************************************** + * * + * * This program and the accompanying materials are made available under the + * * terms of the Apache License, Version 2.0 which is available at + * * https://www.apache.org/licenses/LICENSE-2.0. + * * + * * See the NOTICE file distributed with this work for additional + * * information regarding copyright ownership. + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * * License for the specific language governing permissions and limitations + * * under the License. + * * + * * SPDX-License-Identifier: Apache-2.0 + * ***************************************************************************** + * + */ + +package net.brutex.ai.dnn.api; + +/** + * Unit A unit often refers to the activation function in a layer by which the inputs are + * transformed via a nonlinear activation function (for example by the logistic sigmoid function). + * Usually, a unit has several incoming connections and several outgoing connections. However, units + * can also be more complex, like long short-term memory (LSTM) units, which have multiple + * activation functions with a distinct layout of connections to the nonlinear activation functions, + * or maxout units, which compute the final output over an array of nonlinearly transformed input + * values. Pooling, convolution, and other input transforming functions are usually not referred to + * as units. + *

+ * Artificial Neuron The term artificial neuron—or most often just neuron—is an equivalent term to + * unit, but implies a close connection to neurobiology and the human brain while deep learning has + * very little to do with the brain (for example, it is now thought that biological neurons are more + * similar to entire multilayer perceptrons rather than a single unit in a neural network). The term + * neuron was encouraged after the last AI winter to differentiate the more successful neural + * network from the failing and abandoned perceptron. However, since the wild successes of deep + * learning after 2012, the media often picked up on the term “neuron” and sought to explain deep + * learning as mimicry of the human brain, which is very misleading and potentially dangerous for + * the perception of the field of deep learning. Now the term neuron is discouraged and the more + * descriptive term unit should be used instead. + */ +public interface IUnit { + +} diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/LayerType.java b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/LayerType.java new file mode 100644 index 000000000..ba432d132 --- /dev/null +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/LayerType.java @@ -0,0 +1,52 @@ +/* + * + * ****************************************************************************** + * * + * * This program and the accompanying materials are made available under the + * * terms of the Apache License, Version 2.0 which is available at + * * https://www.apache.org/licenses/LICENSE-2.0. + * * + * * See the NOTICE file distributed with this work for additional + * * information regarding copyright ownership. + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * * License for the specific language governing permissions and limitations + * * under the License. + * * + * * SPDX-License-Identifier: Apache-2.0 + * ***************************************************************************** + * + */ + +package net.brutex.ai.dnn.api; + +import lombok.Getter; +import org.deeplearning4j.nn.conf.layers.ActivationLayer; +import org.deeplearning4j.nn.conf.layers.BatchNormalization; +import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; +import org.deeplearning4j.nn.conf.layers.DropoutLayer; +import org.deeplearning4j.nn.conf.layers.FeedForwardLayer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; +import org.deeplearning4j.nn.conf.layers.NoParamLayer; + +public enum LayerType { + CONV("CONV", "Convolutional", ConvolutionLayer.class), + ACT("ACT", "Activation", ActivationLayer.class), + POOL( "POOL", "Pooling/ Subsampling", NoParamLayer.class), + FC( "FC", "Fully Connected", FeedForwardLayer.class), + BN("BN", "Batch Normalization", BatchNormalization.class), + DO("DO", "Dropout", DropoutLayer.class), + UNKNOWN("UNKNOWN", "Type not specified", LayerConfiguration.class); + +@Getter + String description; + @Getter String name; + @Getter Class clazz; + + LayerType(String name, String description, Class clazz) { + this.name = name; + this.description = description; + this.clazz = clazz; + } +} diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/NN.java b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/NN.java new file mode 100644 index 000000000..3e13e811a --- /dev/null +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/NN.java @@ -0,0 +1,42 @@ +/* + * + * ****************************************************************************** + * * + * * This program and the accompanying materials are made available under the + * * terms of the Apache License, Version 2.0 which is available at + * * https://www.apache.org/licenses/LICENSE-2.0. + * * + * * See the NOTICE file distributed with this work for additional + * * information regarding copyright ownership. + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * * License for the specific language governing permissions and limitations + * * under the License. + * * + * * SPDX-License-Identifier: Apache-2.0 + * ***************************************************************************** + * + */ + +package net.brutex.ai.dnn.api; + +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration.NeuralNetConfigurationBuilder; + +/** + * A fluent API to configure and create artificial neural networks + */ +public class NN { + + + public static NeuralNetConfigurationBuilder net() { + return NeuralNetConfiguration.builder(); + } + + void test() { + Dog.DogBuilder builder = Dog.builder() + .animalString("") + .dogString(""); + } +} diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/conf/NeuralNetworkConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/conf/NeuralNetworkConfiguration.java deleted file mode 100644 index 51de9f873..000000000 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/conf/NeuralNetworkConfiguration.java +++ /dev/null @@ -1,705 +0,0 @@ -/* - * - * ****************************************************************************** - * * - * * This program and the accompanying materials are made available under the - * * terms of the Apache License, Version 2.0 which is available at - * * https://www.apache.org/licenses/LICENSE-2.0. - * * - * * See the NOTICE file distributed with this work for additional - * * information regarding copyright ownership. - * * Unless required by applicable law or agreed to in writing, software - * * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * * License for the specific language governing permissions and limitations - * * under the License. - * * - * * SPDX-License-Identifier: Apache-2.0 - * ***************************************************************************** - * - */ - -package net.brutex.ai.dnn.conf; - -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.exc.InvalidTypeIdException; -import com.fasterxml.jackson.databind.node.ArrayNode; -import java.io.IOException; -import java.io.Serializable; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Random; -import lombok.Getter; -import lombok.NonNull; -import lombok.Setter; -import lombok.Singular; -import lombok.extern.jackson.Jacksonized; -import lombok.extern.slf4j.Slf4j; -import net.brutex.ai.dnn.api.ILayerConfiguration; -import net.brutex.ai.dnn.api.INeuralNetworkConfiguration; -import org.deeplearning4j.nn.conf.BackpropType; -import org.deeplearning4j.nn.conf.CacheMode; -import org.deeplearning4j.nn.conf.GradientNormalization; -import org.deeplearning4j.nn.conf.InputPreProcessor; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.WorkspaceMode; -import org.deeplearning4j.nn.conf.distribution.Distribution; -import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.conf.layers.BaseLayer; -import org.deeplearning4j.nn.conf.layers.BaseOutputLayer; -import org.deeplearning4j.nn.conf.layers.Layer; -import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; -import org.deeplearning4j.nn.conf.memory.MemoryReport; -import org.deeplearning4j.nn.conf.memory.NetworkMemoryReport; -import org.deeplearning4j.nn.conf.serde.JsonMappers; -import org.deeplearning4j.nn.weights.IWeightInit; -import org.deeplearning4j.nn.weights.WeightInit; -import org.nd4j.linalg.activations.Activation; -import org.nd4j.linalg.activations.IActivation; -import org.nd4j.linalg.api.buffer.DataType; -import org.nd4j.linalg.learning.config.IUpdater; -import org.nd4j.linalg.lossfunctions.LossFunctions; -import org.nd4j.linalg.lossfunctions.impl.LossBinaryXENT; -import org.nd4j.linalg.lossfunctions.impl.LossMCXENT; -import org.nd4j.linalg.lossfunctions.impl.LossMSE; -import org.nd4j.linalg.lossfunctions.impl.LossNegativeLogLikelihood; - -/** - * The INeuralNetworkConfiguration is a sequential container for the different layers in your - * network (or other NeuralNetworkConfigurations). That said, NeuralNetworkConfigurations can be - * stacked.

- * It then “chains” outputs to inputs sequentially for each INeuralNetworkConfiguration, - * finally returning the output of the "top" configuration. Any settings made, are inherited and can - * be overridden on a "deeper" level. For this use case, you need to wrap the INeuralNetworkConfiguration - * into a BuildingBlockLayer - * - */ -@Jacksonized -@JsonIgnoreProperties(ignoreUnknown = true) -@lombok.Builder -@Slf4j -public class NeuralNetworkConfiguration extends NeuralNetConfiguration implements - INeuralNetworkConfiguration, Serializable, Cloneable { - - private static final int DEFAULT_TBPTT_LENGTH = 20; - @Getter protected final List confs = new ArrayList<>(); - /** - * hidden list of layers, that "flattens" all the layers of this network and applies - * inheritance. - */ - @lombok.Builder.ObtainVia(method = "calculateInnerLayers") - private final List innerLayerConfigurations; - @Getter @Setter @NonNull @Singular - protected List layers = new ArrayList<>(); - @Getter @Setter @NonNull @lombok.Builder.Default @Deprecated - protected WorkspaceMode trainingWorkspaceMode = WorkspaceMode.ENABLED; - @Getter @Setter @NonNull @lombok.Builder.Default @Deprecated - protected WorkspaceMode inferenceWorkspaceMode = WorkspaceMode.ENABLED; - /** - * The type of backprop. Default setting is used for most networks (MLP, CNN etc), but - * optionally truncated BPTT can be used for training recurrent neural networks. If using - * TruncatedBPTT make sure you set both tBPTTForwardLength() and tBPTTBackwardLength() - */ - @Getter @Setter @NonNull @lombok.Builder.Default - protected BackpropType backpropType = BackpropType.Standard; - @Getter - protected Map inputPreProcessors = new HashMap<>(); - /** - * When doing truncated BPTT: how many steps of forward pass should we do before doing - * (truncated) backprop?
Only applicable when doing - * backpropType(BackpropType.TruncatedBPTT)
Typically tBPTTForwardLength parameter is same - * as the tBPTTBackwardLength parameter, but may be larger than it in some circumstances (but - * never smaller)
Ideally your training data time series length should be divisible by this - * This is the k1 parameter on pg23 of - * http://www.cs.utoronto.ca/~ilya/pubs/ilya_sutskever_phd_thesis.pdf - * - * @param forwardLength Forward length > 0, >= backwardLength - */ - @Getter @Setter protected int tbpttFwdLength = 20; - /** - * When doing truncated BPTT: how many steps of backward should we do?
Only applicable when - * doing backpropType(BackpropType.TruncatedBPTT)
This is the k2 parameter on pg23 of - * http://www.cs.utoronto.ca/~ilya/pubs/ilya_sutskever_phd_thesis.pdf - * - * @param backwardLength <= forwardLength - */ - @Getter @Setter protected int tbpttBackLength = 20; - /** - * Creates and returns a copy of this object. - * - * @return a clone of this instance. - * @throws CloneNotSupportedException if the object's class does not support the {@code Cloneable} - * interface. Subclasses that override the {@code clone} method - * can also throw this exception to indicate that an instance - * cannot be cloned. - * @see Cloneable - */ - - //Nd4j.getRandom().setSeed(getConf(0).getSeed()); //TODO - //Counter for the number of parameter updates so far - // This is important for learning rate schedules, for example, and is stored here to ensure it is persisted - // for Spark and model serialization - @Getter @Setter - protected int iterationCount = 0; - //Counter for the number of epochs completed so far. Used for per-epoch schedules - @Getter @Setter - protected int epochCount = 0; - protected double dampingFactor = 100; - @Getter @Setter //todo why? - private Layer layer; - /** - * A seed for this network, will be random if not specified. - */ - @Getter @Setter @NonNull @lombok.Builder.Default - private long seed = new Random().nextLong(); - /** - * The default {@link CacheMode} for this configuration. Will be set to "NONE" if not specified otherwise. - * This method defines how/if preOutput cache is handled: NONE: cache disabled (default value) - * HOST: Host memory will be used DEVICE: GPU memory will be used (on CPU backends effect will - * be the same as for HOST) - * - * Valid values are
- * CacheMode.NONE,
- * CacheMode.HOST or
- * CacheMode.DEVICE
- * @param cacheMode - */ - @NonNull @Getter @Setter - @lombok.Builder.Default private CacheMode cacheMode = CacheMode.NONE; - /** - * The list of layer configurations in this configuration. They will be indexed automatically - * as the layers get added starting with index 0. - */ - @Singular @Getter - private List layerConfigurations; - /** - * The name for this configuration. Defaults to "Anonymous INeuralNetworkConfiguration" if - * it is not specified. - */ - @lombok.Builder.Default @Getter - private String name = "Anonymous INeuralNetworkConfiguration"; - /** - * The {@link InputType} of the data for this network configuration - */ - private InputType inputType; - /** - * Set the DataType for the network parameters and activations for all layers in the network. - * Default: Float - * - * @param dataType Datatype to use for parameters and activations - */ - @Getter @Setter @lombok.Builder.Default @NonNull - private DataType dataType = DataType.FLOAT; - /** - * Whether to override the nIn configuration forcibly upon construction. Default value is true. - * @return builder pattern - */ - @Getter @Setter - @lombok.Builder.Default - private boolean overrideNinUponBuild = true; - /** - * Enabled by default. If enabled, the output layer configuration will be validated, to throw an - * exception on likely invalid outputs - such as softmax + nOut=1, or LossMCXENT + Tanh.
If - * disabled (false) no output layer validation will be performed.
Disabling this validation - * is not recommended, as the configurations that fail validation usually will not be able to - * learn correctly. However, the option to disable this validation is provided for advanced - * users when creating non-standard architectures. - * - * @param validate If true: validate output layer configuration. False: don't validate - */ - @Getter @Setter @lombok.Builder.Default - private boolean validateOutputLayerConfig=true; - /** - * Enabled by default. If enabled, an exception will be throw when using the (invalid) - * combination of truncated backpropagation through time (TBPTT) with either a - * GlobalPoolingLayer or LastTimeStepLayer.
It is possible to disable this validation to - * allow what is almost certainly an invalid configuration to be used, however this is not - * recommended. - * - * @param validate Whether TBPTT validation should be performed - */ - @Getter @Setter @lombok.Builder.Default - private boolean validateTbpttConfig=true; - - - - /** - * Gradient updater configuration. For example, {@link org.nd4j.linalg.learning.config.Adam} - * or {@link org.nd4j.linalg.learning.config.Nesterovs}
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis. - * - * @param updater Updater to use - */ - @Getter @Setter @NonNull - private IUpdater updater; - - /** - * Gradient normalization strategy. Used to specify gradient renormalization, gradient clipping etc. - * See {@link GradientNormalization} for details
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis. - * - * @param gradientNormalization Type of normalization to use. Defaults to None. - * @see GradientNormalization - */ - @Getter @Setter @NonNull @lombok.Builder.Default - private GradientNormalization gradientNormalization = GradientNormalization.None; - - /** - * Threshold for gradient normalization, only used for GradientNormalization.ClipL2PerLayer, - * GradientNormalization.ClipL2PerParamType, and GradientNormalization.ClipElementWiseAbsoluteValue
- * Not used otherwise.
- * L2 threshold for first two types of clipping, or absolute value threshold for last type of clipping.
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis. - */ - @Getter @Setter - private double gradientNormalizationThreshold; - - - /** - * Weight initialization scheme to use, for initial weight values - * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis. - */ - @Getter @Setter - private IWeightInit weightInit; - - /** - * Activation function / neuron non-linearity
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis. - */ - @Getter @Setter - private IActivation activation; - - - - /** - * Create a neural net configuration from json - * - * @param json the neural net configuration from json - * @return {@link NeuralNetworkConfiguration} - */ - public static NeuralNetworkConfiguration fromJson(String json) { - NeuralNetworkConfiguration conf; - ObjectMapper mapper = NeuralNetworkConfiguration.mapper(); - try { - conf = mapper.readValue(json, NeuralNetworkConfiguration.class); - } catch (InvalidTypeIdException e) { - if (e.getMessage().contains("@class")) { - try { - //JSON may be legacy (1.0.0-alpha or earlier), attempt to load it using old format - return JsonMappers.getLegacyMapper().readValue(json, NeuralNetworkConfiguration.class); - } catch (InvalidTypeIdException e2) { - //Check for legacy custom layers: "Could not resolve type id 'CustomLayer' as a subtype of [simple type, class org.deeplearning4j.nn.conf.layers.ILayer]: known type ids = [Bidirectional, CenterLossOutputLayer, CnnLossLayer, ..." - //1.0.0-beta5: dropping support for custom layers defined in pre-1.0.0-beta format. Built-in layers from these formats still work - String msg = e2.getMessage(); - if (msg != null && msg.contains("Could not resolve type id")) { - throw new RuntimeException( - "Error deserializing MultiLayerConfiguration - configuration may have a custom " + - "layer, vertex or preprocessor, in pre version 1.0.0-beta JSON format.\nModels in legacy format with custom" - + - " layers should be loaded in 1.0.0-beta to 1.0.0-beta4 and saved again, before loading in the current version of DL4J", - e); - } - throw new RuntimeException(e2); - } catch (IOException e2) { - throw new RuntimeException(e2); - } - } - throw new RuntimeException(e); - } catch (IOException e) { - //Check if this exception came from legacy deserializer... - String msg = e.getMessage(); - if (msg != null && msg.contains("legacy")) { - throw new RuntimeException( - "Error deserializing MultiLayerConfiguration - configuration may have a custom " + - "layer, vertex or preprocessor, in pre version 1.0.0-alpha JSON format. These layers can be " - + - "deserialized by first registering them with NeuralNetConfiguration.registerLegacyCustomClassesForJSON(Class...)", - e); - } - throw new RuntimeException(e); - } - - //To maintain backward compatibility after loss function refactoring (configs generated with v0.5.0 or earlier) - // Previously: enumeration used for loss functions. Now: use classes - // IN the past, could have only been an OutputLayer or RnnOutputLayer using these enums - int layerCount = 0; - JsonNode confs = null; - for (NeuralNetworkConfiguration nnc : conf.getConfs()) { - Layer l = nnc.getLayer(); - if (l instanceof BaseOutputLayer && ((BaseOutputLayer) l).getLossFn() == null) { - //lossFn field null -> may be an old config format, with lossFunction field being for the enum - //if so, try walking the JSON graph to extract out the appropriate enum value - - BaseOutputLayer ol = (BaseOutputLayer) l; - try { - JsonNode jsonNode = mapper.readTree(json); - if (confs == null) { - confs = jsonNode.get("confs"); - } - if (confs instanceof ArrayNode) { - ArrayNode layerConfs = (ArrayNode) confs; - JsonNode outputLayerNNCNode = layerConfs.get(layerCount); - if (outputLayerNNCNode == null) { - throw new RuntimeException("should never happen"); //return conf; //Should never happen... - } - JsonNode outputLayerNode = outputLayerNNCNode.get("layer"); - - JsonNode lossFunctionNode = null; - if (outputLayerNode.has("output")) { - lossFunctionNode = outputLayerNode.get("output").get("lossFunction"); - } else if (outputLayerNode.has("rnnoutput")) { - lossFunctionNode = outputLayerNode.get("rnnoutput").get("lossFunction"); - } - - if (lossFunctionNode != null) { - String lossFunctionEnumStr = lossFunctionNode.asText(); - LossFunctions.LossFunction lossFunction = null; - try { - lossFunction = LossFunctions.LossFunction.valueOf(lossFunctionEnumStr); - } catch (Exception e) { - log.warn( - "OutputLayer with null LossFunction or pre-0.6.0 loss function configuration detected: could not parse JSON", - e); - } - - if (lossFunction != null) { - switch (lossFunction) { - case MSE: - ol.setLossFn(new LossMSE()); - break; - case XENT: - ol.setLossFn(new LossBinaryXENT()); - break; - case NEGATIVELOGLIKELIHOOD: - ol.setLossFn(new LossNegativeLogLikelihood()); - break; - case MCXENT: - ol.setLossFn(new LossMCXENT()); - break; - - //Remaining: TODO - case SQUARED_LOSS: - case RECONSTRUCTION_CROSSENTROPY: - default: - log.warn( - "OutputLayer with null LossFunction or pre-0.6.0 loss function configuration detected: could not set loss function for {}", - lossFunction); - break; - } - } - } - - } else { - log.warn( - "OutputLayer with null LossFunction or pre-0.6.0 loss function configuration detected: could not parse JSON: layer 'confs' field is not an ArrayNode (is: {})", - (confs != null ? confs.getClass() : null)); - } - } catch (IOException e) { - log.warn( - "OutputLayer with null LossFunction or pre-0.6.0 loss function configuration detected: could not parse JSON", - e); - break; - } - } - - //Also, pre 0.7.2: activation functions were Strings ("activationFunction" field), not classes ("activationFn") - //Try to load the old format if necessary, and create the appropriate IActivation instance - if ((l instanceof BaseLayer) && ((BaseLayer) l).getActivationFn() == null) { - try { - JsonNode jsonNode = mapper.readTree(json); - if (confs == null) { - confs = jsonNode.get("confs"); - } - if (confs instanceof ArrayNode) { - ArrayNode layerConfs = (ArrayNode) confs; - JsonNode outputLayerNNCNode = layerConfs.get(layerCount); - if (outputLayerNNCNode == null) { - throw new RuntimeException("Should never happen"); //return conf; //Should never happen... - } - JsonNode layerWrapperNode = outputLayerNNCNode.get("layer"); - - if (layerWrapperNode == null || layerWrapperNode.size() != 1) { - continue; - } - - JsonNode layerNode = layerWrapperNode.elements().next(); - JsonNode activationFunction = layerNode.get( - "activationFunction"); //Should only have 1 element: "dense", "output", etc - - if (activationFunction != null) { - IActivation ia = Activation.fromString(activationFunction.asText()) - .getActivationFunction(); - ((BaseLayer) l).setActivationFn(ia); - } - } - - } catch (IOException e) { - log.warn( - "ILayer with null ActivationFn field or pre-0.7.2 activation function detected: could not parse JSON", - e); - } - } - - if (!handleLegacyWeightInitFromJson(json, l, mapper, confs, layerCount)) { - return conf; - } - - layerCount++; - } - return conf; - } - - /** - * Handle {@link WeightInit} and {@link Distribution} from legacy configs in Json format. Copied - * from handling of {@link Activation} above. - * - * @return True if all is well and layer iteration shall continue. False else-wise. - */ - private static boolean handleLegacyWeightInitFromJson(String json, Layer l, ObjectMapper mapper, - JsonNode confs, int layerCount) { - if ((l instanceof BaseLayer) && ((BaseLayer) l).getWeightInitFn() == null) { - try { - JsonNode jsonNode = mapper.readTree(json); - if (confs == null) { - confs = jsonNode.get("confs"); - } - if (confs instanceof ArrayNode) { - ArrayNode layerConfs = (ArrayNode) confs; - JsonNode outputLayerNNCNode = layerConfs.get(layerCount); - if (outputLayerNNCNode == null) { - return false; //Should never happen... - } - JsonNode layerWrapperNode = outputLayerNNCNode.get("layer"); - - if (layerWrapperNode == null || layerWrapperNode.size() != 1) { - return true; - } - - JsonNode layerNode = layerWrapperNode.elements().next(); - JsonNode weightInit = layerNode.get( - "weightInit"); //Should only have 1 element: "dense", "output", etc - JsonNode distribution = layerNode.get("dist"); - - Distribution dist = null; - if (distribution != null) { - dist = mapper.treeToValue(distribution, Distribution.class); - } - - if (weightInit != null) { - final IWeightInit wi = WeightInit.valueOf(weightInit.asText()) - .getWeightInitFunction(dist); - ((BaseLayer) l).setWeightInitFn(wi); - } - } - - } catch (IOException e) { - log.warn( - "ILayer with null WeightInit detected: " + l.getLayerName() + ", could not parse JSON", - e); - } - } - return true; - - } - - /** - * Object mapper for serialization of configurations - * - * @return - */ - public static ObjectMapper mapperYaml() { - return JsonMappers.getMapperYaml(); - } - - /** - * Object mapper for serialization of configurations - * - * @return - */ - public static ObjectMapper mapper() { - return JsonMappers.getMapper(); - } - - - - /** - * @return JSON representation of NN configuration - */ - public String toYaml() { - ObjectMapper mapper = NeuralNetConfiguration.mapperYaml(); - synchronized (mapper) { - try { - return mapper.writeValueAsString(this); - } catch (com.fasterxml.jackson.core.JsonProcessingException e) { - throw new RuntimeException(e); - } - } - } - - /** - * @return JSON representation of NN configuration - */ - public String toJson() { - ObjectMapper mapper = NeuralNetConfiguration.mapper(); - synchronized (mapper) { - //JSON mappers are supposed to be thread safe: however, in practice they seem to miss fields occasionally - //when writeValueAsString is used by multiple threads. This results in invalid JSON. See issue #3243 - try { - return mapper.writeValueAsString(this); - } catch (com.fasterxml.jackson.core.JsonProcessingException e) { - throw new RuntimeException(e); - } - } - } - - @Override - public String toString() { - return toJson(); - } - - public NeuralNetworkConfiguration getConf(int i) { - return confs.get(i); - } - - @Override - public NeuralNetworkConfiguration clone() { - - NeuralNetworkConfiguration clone = (NeuralNetworkConfiguration) super.clone(); - List confList = clone.getConfs(); - if (confList != null) { - List list = new ArrayList<>(); - for (NeuralNetworkConfiguration conf : confList) { - list.add(conf.clone()); - } - } - - if (clone.getInputPreProcessors() != null) { - Map map = new HashMap<>(); - for (Map.Entry entry : clone.getInputPreProcessors().entrySet()) { - map.put(entry.getKey(), entry.getValue().clone()); - } - clone.getInputPreProcessors().clear(); - clone.getInputPreProcessors().putAll(map); - } - - clone.setInferenceWorkspaceMode(this.inferenceWorkspaceMode); - clone.setTrainingWorkspaceMode(this.trainingWorkspaceMode); - clone.setCacheMode(this.cacheMode); - clone.setValidateOutputLayerConfig(this.validateOutputLayerConfig); - clone.setDataType(this.dataType); - - return clone; - - } - - public InputPreProcessor getInputPreProcess(int curr) { - return inputPreProcessors.get(curr); - } - - /** - * Get a {@link MemoryReport} for the given MultiLayerConfiguration. This is used to estimate the - * memory requirements for the given network configuration and input - * - * @param inputType Input types for the network - * @return Memory report for the network - */ - public NetworkMemoryReport getMemoryReport(InputType inputType) { - - Map memoryReportMap = new LinkedHashMap<>(); - int nLayers = confs.size(); - for (int i = 0; i < nLayers; i++) { - String layerName = confs.get(i).getLayer().getLayerName(); - if (layerName == null) { - layerName = String.valueOf(i); - } - - //Pass input type through preprocessor, if necessary - InputPreProcessor preproc = getInputPreProcess(i); - //TODO memory requirements for preprocessor - if (preproc != null) { - inputType = preproc.getOutputType(inputType); - } - - LayerMemoryReport report = confs.get(i).getLayer().getMemoryReport(inputType); - memoryReportMap.put(layerName, report); - - inputType = confs.get(i).getLayer().getOutputType(i, inputType); - } - - return new NetworkMemoryReport(memoryReportMap, MultiLayerConfiguration.class, - "MultiLayerNetwork", inputType); - } - - /** - * For the given input shape/type for the network, return a list of activation sizes for each - * layer in the network.
i.e., list.get(i) is the output activation sizes for layer i - * - * @param inputType Input type for the network - * @return A lits of activation types for the network, indexed by layer number - */ - public List getLayerActivationTypes(@NonNull InputType inputType) { - List out = new ArrayList<>(); - int nLayers = confs.size(); - for (int i = 0; i < nLayers; i++) { - InputPreProcessor preproc = getInputPreProcess(i); - if (preproc != null) { - inputType = preproc.getOutputType(inputType); - } - - inputType = confs.get(i).getLayer().getOutputType(i, inputType); - out.add(inputType); - } - return out; - } - - /** - * Defines some additional handy methods. Other than that, - * the builder is generated by lombok. - */ - public static class NeuralNetworkConfigurationBuilder { - - /** - * Specify the processors. These are used at each layer for doing things like normalization and - * shaping of input. - * - * @param processor what to use to preProcess the data. - * @return builder pattern - */ - public NeuralNetworkConfigurationBuilder inputPreProcessor(Integer layer, - InputPreProcessor processor) { - inputPreProcessors.put(layer, processor); - return this; - } - - /** - * Specify additional layer configurations - */ - @Deprecated - public NeuralNetworkConfigurationBuilder layersFromArray(Layer[] arrLayers) { - for(Layer l : arrLayers) { - layers.add( l ); - } - return this; - } - } - - -} diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/conf/layer/DenseLayerConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/conf/layer/DenseLayerConfiguration.java deleted file mode 100644 index d472d99b2..000000000 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/conf/layer/DenseLayerConfiguration.java +++ /dev/null @@ -1,62 +0,0 @@ -/* - * - * ****************************************************************************** - * * - * * This program and the accompanying materials are made available under the - * * terms of the Apache License, Version 2.0 which is available at - * * https://www.apache.org/licenses/LICENSE-2.0. - * * - * * See the NOTICE file distributed with this work for additional - * * information regarding copyright ownership. - * * Unless required by applicable law or agreed to in writing, software - * * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * * License for the specific language governing permissions and limitations - * * under the License. - * * - * * SPDX-License-Identifier: Apache-2.0 - * ***************************************************************************** - * - */ - -package net.brutex.ai.dnn.conf.layer; - -import lombok.Builder; -import lombok.experimental.SuperBuilder; -import org.deeplearning4j.nn.conf.layers.LayerValidation; - -/** - * The dense layer is a neural network layer that is connected deeply, which means each neuron in - * the dense layer receives input from all neurons of its previous layer. The dense layer is found - * to be the most commonly used layer in the models. - *

- * In the background, the dense layer performs a matrix-vector multiplication. The values used in - * the matrix are actually parameters that can be trained and updated with the help of - * backpropagation. - *

- * The output generated by the dense layer is an ‘m’ dimensional vector. Thus, dense layer is - * basically used for changing the dimensions of the vector. Dense layers also applies operations - * like rotation, scaling, translation on the vector. - */ -@SuperBuilder -public class DenseLayerConfiguration extends FeedForwardLayerConfiguration { - - /** - * Decides whether we should include a bias vector for calculation purposes or not. - */ - @Builder.Default - boolean bias = true; - - - - /** - * An implementation to validate the network - * - * @return true if no errors found; false otherwise - */ - @Override - public boolean isValid() { - LayerValidation.assertNInNOutSet("DenseLayerConfiguration", getName(), -99, getIn(), getOut()); - return super.isValid(); - } -} diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/conf/layer/FeedForwardLayerConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/conf/layer/FeedForwardLayerConfiguration.java deleted file mode 100644 index c86869d54..000000000 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/conf/layer/FeedForwardLayerConfiguration.java +++ /dev/null @@ -1,99 +0,0 @@ -/* - * - * ****************************************************************************** - * * - * * This program and the accompanying materials are made available under the - * * terms of the Apache License, Version 2.0 which is available at - * * https://www.apache.org/licenses/LICENSE-2.0. - * * - * * See the NOTICE file distributed with this work for additional - * * information regarding copyright ownership. - * * Unless required by applicable law or agreed to in writing, software - * * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * * License for the specific language governing permissions and limitations - * * under the License. - * * - * * SPDX-License-Identifier: Apache-2.0 - * ***************************************************************************** - * - */ - -package net.brutex.ai.dnn.conf.layer; - -import lombok.Getter; -import lombok.experimental.SuperBuilder; -import lombok.extern.slf4j.Slf4j; -import net.brutex.ai.dnn.api.ILayer; -import net.brutex.ai.dnn.api.ILayerConfiguration; -import net.brutex.ai.dnn.api.IModel; -import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.conf.inputs.InputType.Type; - -/** - * A Feed Forward Layer Configuration - */ -@Slf4j -@SuperBuilder -public class FeedForwardLayerConfiguration extends AbstractLayerConfiguration implements ILayerConfiguration { - - @Getter private int in; - @Getter private int out; - - /** - * This Fast Forward ILayer will always output data as - * FF type. - * @return InputType for FF - **/ - @Getter - final InputType.Type outputType = InputType.Type.FF; - - @Getter - final InputType.Type inputType = InputType.Type.FF; - - /** - * Create and return an instance of a ILayerConfiguration. - * - * @param network the "holding" network for the instance - * @return the new layer instance - */ - //@Override - public ILayer instantiate(IModel network) { - //Let's do some verifications first - if(getInputType() != Type.FF) { - log.error("The {} layer configuration must use an InputType of {}, but found {}", - this.getClass().getSimpleName(), - Type.FF.name(), - getInputType().name()); - } - return null; - } - - /** - * Number of trainable parameter in this layer - * - * @return number of parameter - */ - @Override - public long numParameters() { - return in * out + out; //add one extra out for the bias - } - - /** - * An implementation should provide a method to validate the network - * - * @return true if no errors found; false otherwise - */ - @Override - public boolean isValid() { - boolean result = true; - if(getInputType() != Type.FF) { - log.error("The {} layer configuration must use an InputType of {}, but found {}", - this.getClass().getSimpleName(), - Type.FF.name(), - getInputType().name()); - result = false; - } - return result; - } -} diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/networks/ArtificialNeuralNetwork.java b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/networks/ArtificialNeuralNetwork.java index 0a605b94f..2b900a5ff 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/networks/ArtificialNeuralNetwork.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/networks/ArtificialNeuralNetwork.java @@ -21,10 +21,15 @@ package net.brutex.ai.dnn.networks; +import java.util.Map; import lombok.Getter; +import lombok.NonNull; import lombok.Setter; -import net.brutex.ai.dnn.conf.NeuralNetworkConfiguration; -import net.brutex.ai.dnn.api.INeuralNetwork; +import net.brutex.ai.dnn.api.IModel; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.gradient.Gradient; +import org.nd4j.linalg.api.ndarray.INDArray; + /** * Artificial Neural Network An artificial neural network (1) takes some input data, and (2) @@ -41,13 +46,50 @@ import net.brutex.ai.dnn.api.INeuralNetwork; * predictions of the network and the desired values and then using this error signal to change the * weights (or parameters) so that predictions get more accurate. */ -public abstract class ArtificialNeuralNetwork implements INeuralNetwork { +public abstract class ArtificialNeuralNetwork implements IModel { /** * A neural network is created from a configuration. + * * @param conf The (new net.brutex.ai) configuration for the network */ @Getter - @Setter //TODO make this also final and @NonNull - private NeuralNetworkConfiguration configuration; + @Setter + @NonNull + private NeuralNetConfiguration netConfiguration; + + + /** + * Create a new network from configuration + * + * @param conf the configuration + */ + public ArtificialNeuralNetwork(NeuralNetConfiguration conf) { + this.netConfiguration = conf; + } + + /** + * Update all parameters (for all parameter types) with the given gradient. + * + * @param gradient the gradients to add + */ + public void update(Gradient gradient) { + for (String paramType : gradient.gradientForVariable().keySet()) { + update(gradient.getGradientFor(paramType), paramType); + } + } + + /** + * Update the parameters of a given type with a given gradient. + * + * @param gradient the gradient to apply + * @param paramType + */ + public void update(INDArray gradient, String paramType) { + setParam(paramType, getParam(paramType).addi(gradient)); + } + + + + } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/EarlyStoppingConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/EarlyStoppingConfiguration.java index 8f55745ed..d95c5aab6 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/EarlyStoppingConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/EarlyStoppingConfiguration.java @@ -22,12 +22,12 @@ package org.deeplearning4j.earlystopping; import lombok.Data; import lombok.NoArgsConstructor; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.earlystopping.saver.InMemoryModelSaver; import org.deeplearning4j.earlystopping.scorecalc.ScoreCalculator; import org.deeplearning4j.earlystopping.termination.EpochTerminationCondition; import org.deeplearning4j.earlystopping.termination.IterationTerminationCondition; import org.deeplearning4j.exception.DL4JInvalidConfigException; -import org.deeplearning4j.nn.api.Model; import org.nd4j.common.function.Supplier; import java.io.Serializable; @@ -37,7 +37,7 @@ import java.util.List; @Data @NoArgsConstructor -public class EarlyStoppingConfiguration implements Serializable { +public class EarlyStoppingConfiguration implements Serializable { private EarlyStoppingModelSaver modelSaver; private List epochTerminationConditions; @@ -89,7 +89,7 @@ public class EarlyStoppingConfiguration implements Serializable } - public static class Builder { + public static class Builder { private EarlyStoppingModelSaver modelSaver = new InMemoryModelSaver<>(); private List epochTerminationConditions = new ArrayList<>(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/EarlyStoppingModelSaver.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/EarlyStoppingModelSaver.java index a9793175a..9037e0792 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/EarlyStoppingModelSaver.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/EarlyStoppingModelSaver.java @@ -20,10 +20,10 @@ package org.deeplearning4j.earlystopping; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.earlystopping.saver.InMemoryModelSaver; import org.deeplearning4j.earlystopping.saver.LocalFileGraphSaver; import org.deeplearning4j.earlystopping.saver.LocalFileModelSaver; -import org.deeplearning4j.nn.api.Model; import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonSubTypes; import com.fasterxml.jackson.annotation.JsonTypeInfo; @@ -38,7 +38,7 @@ import java.io.Serializable; }) @JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, include = JsonTypeInfo.As.PROPERTY, property = "@class") -public interface EarlyStoppingModelSaver extends Serializable { +public interface EarlyStoppingModelSaver extends Serializable { /** Save the best model (so far) learned during early stopping training */ void saveBestModel(T net, double score) throws IOException; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/EarlyStoppingResult.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/EarlyStoppingResult.java index 6f44c7fdb..817f4c7db 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/EarlyStoppingResult.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/EarlyStoppingResult.java @@ -21,13 +21,13 @@ package org.deeplearning4j.earlystopping; import lombok.Data; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import java.io.Serializable; import java.util.Map; @Data -public class EarlyStoppingResult implements Serializable { +public class EarlyStoppingResult implements Serializable { public enum TerminationReason { Error, IterationTerminationCondition, EpochTerminationCondition } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/listener/EarlyStoppingListener.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/listener/EarlyStoppingListener.java index 191870de3..016b31881 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/listener/EarlyStoppingListener.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/listener/EarlyStoppingListener.java @@ -20,11 +20,11 @@ package org.deeplearning4j.earlystopping.listener; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.earlystopping.EarlyStoppingConfiguration; import org.deeplearning4j.earlystopping.EarlyStoppingResult; -import org.deeplearning4j.nn.api.Model; -public interface EarlyStoppingListener { +public interface EarlyStoppingListener { /**Method to be called when early stopping training is first started */ diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/saver/InMemoryModelSaver.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/saver/InMemoryModelSaver.java index 4e63ef0c5..b24b47651 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/saver/InMemoryModelSaver.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/saver/InMemoryModelSaver.java @@ -21,11 +21,11 @@ package org.deeplearning4j.earlystopping.saver; import org.deeplearning4j.earlystopping.EarlyStoppingModelSaver; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import java.io.IOException; -public class InMemoryModelSaver implements EarlyStoppingModelSaver { +public class InMemoryModelSaver implements EarlyStoppingModelSaver { private transient T bestModel; private transient T latestModel; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/AutoencoderScoreCalculator.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/AutoencoderScoreCalculator.java index 0c70667dd..69f1785e4 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/AutoencoderScoreCalculator.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/AutoencoderScoreCalculator.java @@ -20,9 +20,9 @@ package org.deeplearning4j.earlystopping.scorecalc; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.earlystopping.scorecalc.base.BaseScoreCalculator; import org.deeplearning4j.nn.api.Layer; -import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.layers.feedforward.autoencoder.AutoEncoder; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; @@ -32,7 +32,7 @@ import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -public class AutoencoderScoreCalculator extends BaseScoreCalculator { +public class AutoencoderScoreCalculator extends BaseScoreCalculator { protected final Metric metric; protected RegressionEvaluation evaluation; @@ -48,7 +48,7 @@ public class AutoencoderScoreCalculator extends BaseScoreCalculator { } @Override - protected INDArray output(Model net, INDArray input, INDArray fMask, INDArray lMask) { + protected INDArray output(IModel net, INDArray input, INDArray fMask, INDArray lMask) { Layer l; if(net instanceof MultiLayerNetwork) { @@ -71,19 +71,19 @@ public class AutoencoderScoreCalculator extends BaseScoreCalculator { } @Override - protected INDArray[] output(Model network, INDArray[] input, INDArray[] fMask, INDArray[] lMask) { + protected INDArray[] output(IModel network, INDArray[] input, INDArray[] fMask, INDArray[] lMask) { return new INDArray[]{output(network, get0(input), get0(fMask), get0(lMask))}; } @Override - protected double scoreMinibatch(Model network, INDArray features, INDArray labels, INDArray fMask, + protected double scoreMinibatch(IModel network, INDArray features, INDArray labels, INDArray fMask, INDArray lMask, INDArray output) { evaluation.eval(features, output); return 0.0; //Not used } @Override - protected double scoreMinibatch(Model network, INDArray[] features, INDArray[] labels, INDArray[] fMask, INDArray[] lMask, INDArray[] output) { + protected double scoreMinibatch(IModel network, INDArray[] features, INDArray[] labels, INDArray[] fMask, INDArray[] lMask, INDArray[] output) { return scoreMinibatch(network, get0(features), get0(labels), get0(fMask), get0(lMask), get0(output)); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/ClassificationScoreCalculator.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/ClassificationScoreCalculator.java index ae13edc79..b9884f68f 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/ClassificationScoreCalculator.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/ClassificationScoreCalculator.java @@ -20,13 +20,13 @@ package org.deeplearning4j.earlystopping.scorecalc; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.earlystopping.scorecalc.base.BaseIEvaluationScoreCalculator; -import org.deeplearning4j.nn.api.Model; import org.nd4j.evaluation.classification.Evaluation; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator; -public class ClassificationScoreCalculator extends BaseIEvaluationScoreCalculator { +public class ClassificationScoreCalculator extends BaseIEvaluationScoreCalculator { protected final Evaluation.Metric metric; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/DataSetLossCalculator.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/DataSetLossCalculator.java index e8d403a7f..2f6199449 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/DataSetLossCalculator.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/DataSetLossCalculator.java @@ -21,7 +21,7 @@ package org.deeplearning4j.earlystopping.scorecalc; import org.deeplearning4j.earlystopping.scorecalc.base.BaseScoreCalculator; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.nd4j.linalg.api.ndarray.INDArray; @@ -31,7 +31,7 @@ import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator; import com.fasterxml.jackson.annotation.JsonProperty; -public class DataSetLossCalculator extends BaseScoreCalculator { +public class DataSetLossCalculator extends BaseScoreCalculator { @JsonProperty private boolean average; @@ -70,12 +70,12 @@ public class DataSetLossCalculator extends BaseScoreCalculator { } @Override - protected INDArray output(Model network, INDArray input, INDArray fMask, INDArray lMask) { + protected INDArray output(IModel network, INDArray input, INDArray fMask, INDArray lMask) { return output(network, arr(input), arr(fMask), arr(lMask))[0]; } @Override - protected INDArray[] output(Model network, INDArray[] input, INDArray[] fMask, INDArray[] lMask) { + protected INDArray[] output(IModel network, INDArray[] input, INDArray[] fMask, INDArray[] lMask) { if(network instanceof MultiLayerNetwork){ INDArray out = ((MultiLayerNetwork) network).output(input[0], false, get0(fMask), get0(lMask)); return new INDArray[]{out}; @@ -87,7 +87,7 @@ public class DataSetLossCalculator extends BaseScoreCalculator { } @Override - protected double scoreMinibatch(Model network, INDArray[] features, INDArray[] labels, INDArray[] fMask, INDArray[] lMask, INDArray[] output) { + protected double scoreMinibatch(IModel network, INDArray[] features, INDArray[] labels, INDArray[] fMask, INDArray[] lMask, INDArray[] output) { if(network instanceof MultiLayerNetwork){ return ((MultiLayerNetwork) network).score(new DataSet(get0(features), get0(labels), get0(fMask), get0(lMask)), false) * features[0].size(0); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/ROCScoreCalculator.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/ROCScoreCalculator.java index 27fdbd8aa..ca3e5ab1c 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/ROCScoreCalculator.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/ROCScoreCalculator.java @@ -20,8 +20,8 @@ package org.deeplearning4j.earlystopping.scorecalc; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.earlystopping.scorecalc.base.BaseIEvaluationScoreCalculator; -import org.deeplearning4j.nn.api.Model; import org.nd4j.evaluation.IEvaluation; import org.nd4j.evaluation.classification.ROC; import org.nd4j.evaluation.classification.ROCBinary; @@ -29,7 +29,7 @@ import org.nd4j.evaluation.classification.ROCMultiClass; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator; -public class ROCScoreCalculator extends BaseIEvaluationScoreCalculator { +public class ROCScoreCalculator extends BaseIEvaluationScoreCalculator { public enum ROCType {ROC, BINARY, MULTICLASS} public enum Metric {AUC, AUPRC} diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/RegressionScoreCalculator.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/RegressionScoreCalculator.java index 5dab31e29..3ffd58a6a 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/RegressionScoreCalculator.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/RegressionScoreCalculator.java @@ -20,13 +20,13 @@ package org.deeplearning4j.earlystopping.scorecalc; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.earlystopping.scorecalc.base.BaseIEvaluationScoreCalculator; -import org.deeplearning4j.nn.api.Model; import org.nd4j.evaluation.regression.RegressionEvaluation; import org.nd4j.evaluation.regression.RegressionEvaluation.Metric; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; -public class RegressionScoreCalculator extends BaseIEvaluationScoreCalculator { +public class RegressionScoreCalculator extends BaseIEvaluationScoreCalculator { protected final Metric metric; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/ScoreCalculator.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/ScoreCalculator.java index 8e994a678..a9568d2d9 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/ScoreCalculator.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/ScoreCalculator.java @@ -20,7 +20,7 @@ package org.deeplearning4j.earlystopping.scorecalc; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonSubTypes; import com.fasterxml.jackson.annotation.JsonTypeInfo; @@ -34,7 +34,7 @@ import java.io.Serializable; @JsonSubTypes.Type(value = DataSetLossCalculatorCG.class, name = "MaxEpochsTerminationCondition"), }) -public interface ScoreCalculator extends Serializable { +public interface ScoreCalculator extends Serializable { /** Calculate the score for the given MultiLayerNetwork */ double calculateScore(T network); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/VAEReconErrorScoreCalculator.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/VAEReconErrorScoreCalculator.java index 687eb9969..4b2f1eb9f 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/VAEReconErrorScoreCalculator.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/VAEReconErrorScoreCalculator.java @@ -20,9 +20,9 @@ package org.deeplearning4j.earlystopping.scorecalc; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.earlystopping.scorecalc.base.BaseScoreCalculator; import org.deeplearning4j.nn.api.Layer; -import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.layers.variational.VariationalAutoencoder; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; @@ -32,7 +32,7 @@ import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -public class VAEReconErrorScoreCalculator extends BaseScoreCalculator { +public class VAEReconErrorScoreCalculator extends BaseScoreCalculator { protected final Metric metric; protected RegressionEvaluation evaluation; @@ -54,7 +54,7 @@ public class VAEReconErrorScoreCalculator extends BaseScoreCalculator { } @Override - protected INDArray output(Model net, INDArray input, INDArray fMask, INDArray lMask) { + protected INDArray output(IModel net, INDArray input, INDArray fMask, INDArray lMask) { Layer l; if(net instanceof MultiLayerNetwork) { MultiLayerNetwork network = (MultiLayerNetwork)net; @@ -74,19 +74,19 @@ public class VAEReconErrorScoreCalculator extends BaseScoreCalculator { } @Override - protected INDArray[] output(Model network, INDArray[] input, INDArray[] fMask, INDArray[] lMask) { + protected INDArray[] output(IModel network, INDArray[] input, INDArray[] fMask, INDArray[] lMask) { return new INDArray[]{output(network, get0(input), get0(fMask), get0(lMask))}; } @Override - protected double scoreMinibatch(Model network, INDArray features, INDArray labels, INDArray fMask, + protected double scoreMinibatch(IModel network, INDArray features, INDArray labels, INDArray fMask, INDArray lMask, INDArray output) { evaluation.eval(features, output); return 0.0; //Not used } @Override - protected double scoreMinibatch(Model network, INDArray[] features, INDArray[] labels, INDArray[] fMask, INDArray[] lMask, INDArray[] output) { + protected double scoreMinibatch(IModel network, INDArray[] features, INDArray[] labels, INDArray[] fMask, INDArray[] lMask, INDArray[] output) { return scoreMinibatch(network, get0(features), get0(labels), get0(fMask), get0(lMask), get0(output)); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/VAEReconProbScoreCalculator.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/VAEReconProbScoreCalculator.java index 0ed2aef4b..0328d7e66 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/VAEReconProbScoreCalculator.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/VAEReconProbScoreCalculator.java @@ -20,16 +20,16 @@ package org.deeplearning4j.earlystopping.scorecalc; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.earlystopping.scorecalc.base.BaseScoreCalculator; import org.deeplearning4j.nn.api.Layer; -import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.layers.variational.VariationalAutoencoder; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; -public class VAEReconProbScoreCalculator extends BaseScoreCalculator { +public class VAEReconProbScoreCalculator extends BaseScoreCalculator { protected final int reconstructionProbNumSamples; protected final boolean logProb; @@ -73,17 +73,17 @@ public class VAEReconProbScoreCalculator extends BaseScoreCalculator { } @Override - protected INDArray output(Model network, INDArray input, INDArray fMask, INDArray lMask) { + protected INDArray output(IModel network, INDArray input, INDArray fMask, INDArray lMask) { return null; //Not used } @Override - protected INDArray[] output(Model network, INDArray[] input, INDArray[] fMask, INDArray[] lMask) { + protected INDArray[] output(IModel network, INDArray[] input, INDArray[] fMask, INDArray[] lMask) { return null; //Not used } @Override - protected double scoreMinibatch(Model net, INDArray features, INDArray labels, INDArray fMask, + protected double scoreMinibatch(IModel net, INDArray features, INDArray labels, INDArray fMask, INDArray lMask, INDArray output) { Layer l; if(net instanceof MultiLayerNetwork) { @@ -108,7 +108,7 @@ public class VAEReconProbScoreCalculator extends BaseScoreCalculator { } @Override - protected double scoreMinibatch(Model network, INDArray[] features, INDArray[] labels, INDArray[] fMask, INDArray[] lMask, INDArray[] output) { + protected double scoreMinibatch(IModel network, INDArray[] features, INDArray[] labels, INDArray[] fMask, INDArray[] lMask, INDArray[] output) { return 0; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/base/BaseIEvaluationScoreCalculator.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/base/BaseIEvaluationScoreCalculator.java index 89dd780dc..7a064c151 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/base/BaseIEvaluationScoreCalculator.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/base/BaseIEvaluationScoreCalculator.java @@ -22,7 +22,7 @@ package org.deeplearning4j.earlystopping.scorecalc.base; import org.deeplearning4j.datasets.iterator.MultiDataSetWrapperIterator; import org.deeplearning4j.earlystopping.scorecalc.ScoreCalculator; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.nd4j.evaluation.IEvaluation; @@ -30,7 +30,7 @@ import org.nd4j.linalg.dataset.adapter.MultiDataSetIteratorAdapter; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator; -public abstract class BaseIEvaluationScoreCalculator implements ScoreCalculator { +public abstract class BaseIEvaluationScoreCalculator implements ScoreCalculator { protected MultiDataSetIterator iterator; protected DataSetIterator iter; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/base/BaseScoreCalculator.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/base/BaseScoreCalculator.java index d0407b2e9..ce01ebfcd 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/base/BaseScoreCalculator.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/base/BaseScoreCalculator.java @@ -22,14 +22,14 @@ package org.deeplearning4j.earlystopping.scorecalc.base; import lombok.NonNull; import org.deeplearning4j.earlystopping.scorecalc.ScoreCalculator; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.dataset.api.MultiDataSet; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator; -public abstract class BaseScoreCalculator implements ScoreCalculator { +public abstract class BaseScoreCalculator implements ScoreCalculator { protected MultiDataSetIterator mdsIterator; protected DataSetIterator iterator; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/trainer/BaseEarlyStoppingTrainer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/trainer/BaseEarlyStoppingTrainer.java index 4d6ff7675..db65ca7bb 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/trainer/BaseEarlyStoppingTrainer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/trainer/BaseEarlyStoppingTrainer.java @@ -20,13 +20,13 @@ package org.deeplearning4j.earlystopping.trainer; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.earlystopping.EarlyStoppingConfiguration; import org.deeplearning4j.earlystopping.EarlyStoppingResult; import org.deeplearning4j.earlystopping.listener.EarlyStoppingListener; import org.deeplearning4j.earlystopping.scorecalc.ScoreCalculator; import org.deeplearning4j.earlystopping.termination.EpochTerminationCondition; import org.deeplearning4j.earlystopping.termination.IterationTerminationCondition; -import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.optimize.api.TrainingListener; @@ -47,7 +47,7 @@ import java.util.Iterator; import java.util.LinkedHashMap; import java.util.Map; -public abstract class BaseEarlyStoppingTrainer implements IEarlyStoppingTrainer { +public abstract class BaseEarlyStoppingTrainer implements IEarlyStoppingTrainer { private static final Logger log = LoggerFactory.getLogger(BaseEarlyStoppingTrainer.class); @@ -337,7 +337,7 @@ public abstract class BaseEarlyStoppingTrainer implements IEarl } //Trigger epoch listener methods manually - these won't be triggered due to not calling fit(DataSetIterator) etc - protected void triggerEpochListeners(boolean epochStart, Model model, int epochNum){ + protected void triggerEpochListeners(boolean epochStart, IModel model, int epochNum){ Collection listeners; if(model instanceof MultiLayerNetwork){ MultiLayerNetwork n = ((MultiLayerNetwork) model); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/trainer/EarlyStoppingTrainer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/trainer/EarlyStoppingTrainer.java index f4df7a3d4..8c36c07d2 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/trainer/EarlyStoppingTrainer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/trainer/EarlyStoppingTrainer.java @@ -25,8 +25,7 @@ import org.deeplearning4j.datasets.iterator.impl.SingletonDataSetIterator; import org.deeplearning4j.datasets.iterator.impl.SingletonMultiDataSetIterator; import org.deeplearning4j.earlystopping.EarlyStoppingConfiguration; import org.deeplearning4j.earlystopping.listener.EarlyStoppingListener; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; -import org.deeplearning4j.nn.graph.ComputationGraph; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.dataset.api.MultiDataSet; @@ -39,7 +38,7 @@ public class EarlyStoppingTrainer extends BaseEarlyStoppingTrainer earlyStoppingConfiguration, - MultiLayerConfiguration configuration, DataSetIterator train) { + NeuralNetConfiguration configuration, DataSetIterator train) { this(earlyStoppingConfiguration, new MultiLayerNetwork(configuration), train); net.init(); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/trainer/IEarlyStoppingTrainer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/trainer/IEarlyStoppingTrainer.java index fd86168c6..718e10d0d 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/trainer/IEarlyStoppingTrainer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/trainer/IEarlyStoppingTrainer.java @@ -20,11 +20,11 @@ package org.deeplearning4j.earlystopping.trainer; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.earlystopping.EarlyStoppingResult; import org.deeplearning4j.earlystopping.listener.EarlyStoppingListener; -import org.deeplearning4j.nn.api.Model; -public interface IEarlyStoppingTrainer { +public interface IEarlyStoppingTrainer { /** Conduct early stopping training */ EarlyStoppingResult fit(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/gradientcheck/GradientCheckUtil.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/gradientcheck/GradientCheckUtil.java index 696e92bc2..d106f827f 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/gradientcheck/GradientCheckUtil.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/gradientcheck/GradientCheckUtil.java @@ -23,6 +23,7 @@ package org.deeplearning4j.gradientcheck; import lombok.*; import lombok.experimental.Accessors; import lombok.extern.slf4j.Slf4j; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.exception.ND4JArraySizeException; import org.nd4j.common.function.Consumer; @@ -83,7 +84,7 @@ public class GradientCheckUtil { if(outputLayer instanceof BaseOutputLayer){ BaseOutputLayer o = (BaseOutputLayer)outputLayer; lfn = ((org.deeplearning4j.nn.conf.layers.BaseOutputLayer)o.layerConf()).getLossFn(); - afn = o.layerConf().getActivationFn(); + afn = o.getLayerConfiguration().getActivationFn(); } else if(outputLayer instanceof LossLayer){ LossLayer o = (LossLayer) outputLayer; lfn = o.layerConf().getLossFn(); @@ -204,7 +205,7 @@ public class GradientCheckUtil { + "DataTypeUtil.setDTypeForContext(DataType.DOUBLE); before using GradientCheckUtil"); } - DataType netDataType = c.net.getLayerWiseConfigurations().getDataType(); + DataType netDataType = c.net.getNetConfiguration().getDataType(); if (netDataType != DataType.DOUBLE) { throw new IllegalStateException("Cannot perform gradient check: Network datatype is not set to double precision (" + "is: " + netDataType + "). Double precision must be used for gradient checks. Create network with .dataType(DataType.DOUBLE) before using GradientCheckUtil"); @@ -218,9 +219,9 @@ public class GradientCheckUtil { //Check network configuration: int layerCount = 0; - for (NeuralNetConfiguration n : c.net.getLayerWiseConfigurations().getConfs()) { - if (n.getLayer() instanceof BaseLayer) { - BaseLayer bl = (BaseLayer) n.getLayer(); + for (LayerConfiguration n : c.net.getNetConfiguration().getFlattenedLayerConfigurations()) { + if (n instanceof BaseLayer) { + BaseLayer bl = (BaseLayer) n; IUpdater u = bl.getIUpdater(); if (u instanceof Sgd) { //Must have LR of 1.0 @@ -228,7 +229,7 @@ public class GradientCheckUtil { if (lr != 1.0) { throw new IllegalStateException("When using SGD updater, must also use lr=1.0 for layer " + layerCount + "; got " + u + " with lr=" + lr + " for layer \"" - + n.getLayer().getLayerName() + "\""); + + n.getLayerName() + "\""); } } else if (!(u instanceof NoOp)) { throw new IllegalStateException( @@ -238,7 +239,7 @@ public class GradientCheckUtil { IActivation activation = bl.getActivationFn(); if (activation != null) { if (!VALID_ACTIVATION_FUNCTIONS.contains(activation.getClass())) { - log.warn("Layer " + layerCount + " is possibly using an unsuitable activation function: " + log.warn("LayerConfiguration " + layerCount + " is possibly using an unsuitable activation function: " + activation.getClass() + ". Activation functions for gradient checks must be smooth (like sigmoid, tanh, softmax) and not " + "contain discontinuities like ReLU or LeakyReLU (these may cause spurious failures)"); @@ -246,10 +247,10 @@ public class GradientCheckUtil { } } - if (n.getLayer().getIDropout() != null && c.callEachIter == null) { + if (n.getIDropout() != null && c.callEachIter == null) { throw new IllegalStateException("When gradient checking dropout, need to reset RNG seed each iter, or no" + " dropout should be present during gradient checks - got dropout = " - + n.getLayer().getIDropout() + " for layer " + layerCount); + + n.getIDropout() + " for layer " + layerCount); } } @@ -277,7 +278,7 @@ public class GradientCheckUtil { val nParams = originalParams.length(); - Map paramTable = c.net.paramTable(); + Map paramTable = c.net.getParamTable(); List paramNames = new ArrayList<>(paramTable.keySet()); val paramEnds = new long[paramNames.size()]; paramEnds[0] = paramTable.get(paramNames.get(0)).length(); @@ -306,8 +307,8 @@ public class GradientCheckUtil { if(c.print == PrintMode.ALL) { int i=0; for (Layer l : c.net.getLayers()) { - Set s = l.paramTable().keySet(); - log.info("Layer " + i + ": " + l.getClass().getSimpleName() + " - params " + s); + Set s = l.getParamTable().keySet(); + log.info("LayerConfiguration " + i + ": " + l.getClass().getSimpleName() + " - params " + s); i++; } } @@ -450,8 +451,8 @@ public class GradientCheckUtil { continue; LayerVertex lv = (LayerVertex) gv; - if (lv.getLayerConf().getLayer() instanceof BaseLayer) { - BaseLayer bl = (BaseLayer) lv.getLayerConf().getLayer(); + if (lv.getLayerConfiguration() instanceof BaseLayer) { + BaseLayer bl = (BaseLayer) lv.getLayerConfiguration(); IUpdater u = bl.getIUpdater(); if (u instanceof Sgd) { //Must have LR of 1.0 @@ -459,7 +460,7 @@ public class GradientCheckUtil { if (lr != 1.0) { throw new IllegalStateException("When using SGD updater, must also use lr=1.0 for layer " + layerCount + "; got " + u + " with lr=" + lr + " for layer \"" - + lv.getLayerConf().getLayer().getLayerName() + "\""); + + lv.getLayerConfiguration().getLayerName() + "\""); } } else if (!(u instanceof NoOp)) { throw new IllegalStateException( @@ -469,7 +470,7 @@ public class GradientCheckUtil { IActivation activation = bl.getActivationFn(); if (activation != null) { if (!VALID_ACTIVATION_FUNCTIONS.contains(activation.getClass())) { - log.warn("Layer \"" + vertexName + "\" is possibly using an unsuitable activation function: " + log.warn("LayerConfiguration \"" + vertexName + "\" is possibly using an unsuitable activation function: " + activation.getClass() + ". Activation functions for gradient checks must be smooth (like sigmoid, tanh, softmax) and not " + "contain discontinuities like ReLU or LeakyReLU (these may cause spurious failures)"); @@ -477,10 +478,10 @@ public class GradientCheckUtil { } } - if (lv.getLayerConf().getLayer().getIDropout() != null && c.callEachIter == null) { + if (lv.getLayerConfiguration().getIDropout() != null && c.callEachIter == null) { throw new IllegalStateException("When gradient checking dropout, rng seed must be reset each iteration, or no" + " dropout should be present during gradient checks - got dropout = " - + lv.getLayerConf().getLayer().getIDropout() + " for layer " + layerCount); + + lv.getLayerConfiguration().getIDropout() + " for layer " + layerCount); } } @@ -513,7 +514,7 @@ public class GradientCheckUtil { val nParams = originalParams.length(); - Map paramTable = c.net.paramTable(); + Map paramTable = c.net.getParamTable(); List paramNames = new ArrayList<>(paramTable.keySet()); val paramEnds = new long[paramNames.size()]; paramEnds[0] = paramTable.get(paramNames.get(0)).length(); @@ -646,7 +647,7 @@ public class GradientCheckUtil { val nParams = originalParams.length(); - Map paramTable = layer.paramTable(); + Map paramTable = layer.getParamTable(); List paramNames = new ArrayList<>(paramTable.keySet()); val paramEnds = new long[paramNames.size()]; paramEnds[0] = paramTable.get(paramNames.get(0)).length(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/adapters/YoloModelAdapter.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/adapters/YoloModelAdapter.java index 57ec18aa1..ea435af20 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/adapters/YoloModelAdapter.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/adapters/YoloModelAdapter.java @@ -24,7 +24,7 @@ import lombok.AllArgsConstructor; import lombok.Builder; import lombok.NoArgsConstructor; import lombok.val; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.api.ModelAdapter; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.layers.objdetect.DetectedObject; @@ -43,7 +43,7 @@ public class YoloModelAdapter implements ModelAdapter> { @Builder.Default private double detectionThreshold = 0.5; @Override - public List apply(Model model, INDArray[] inputs, INDArray[] masks, INDArray[] labelsMasks) { + public List apply(IModel model, INDArray[] inputs, INDArray[] masks, INDArray[] labelsMasks) { if (model instanceof ComputationGraph) { val blindLayer = ((ComputationGraph) model).getOutputLayer(outputLayerIndex); if (blindLayer instanceof Yolo2OutputLayer) { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/conf/layer/AbstractLayerConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/AbstractParamInitializer.java similarity index 67% rename from cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/conf/layer/AbstractLayerConfiguration.java rename to cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/AbstractParamInitializer.java index 1ed923bda..d93c96448 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/conf/layer/AbstractLayerConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/AbstractParamInitializer.java @@ -19,17 +19,21 @@ * */ -package net.brutex.ai.dnn.conf.layer; +package org.deeplearning4j.nn.api; import lombok.Getter; -import lombok.NonNull; -import lombok.Setter; -import lombok.experimental.SuperBuilder; -import net.brutex.ai.dnn.api.ILayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; -@SuperBuilder -public abstract class AbstractLayerConfiguration implements ILayerConfiguration { +public abstract class AbstractParamInitializer implements ParamInitializer { + + @Deprecated + public long numParams(NeuralNetConfiguration conf) { + long res = 0; + for(LayerConfiguration lc : conf.getFlattenedLayerConfigurations()) { + res += lc.initializer().numParams(lc); + } + return res; + } - @Getter @Setter @NonNull - private String name; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/Classifier.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/Classifier.java index 3643297d3..631f1bed4 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/Classifier.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/Classifier.java @@ -20,6 +20,7 @@ package org.deeplearning4j.nn.api; +import net.brutex.ai.dnn.api.IModel; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.api.DataSet; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; @@ -27,7 +28,7 @@ import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import java.util.List; -public interface Classifier extends Model { +public interface Classifier extends IModel { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/Layer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/Layer.java index e7500055f..41051df53 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/Layer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/Layer.java @@ -21,7 +21,11 @@ package org.deeplearning4j.nn.api; +import lombok.NonNull; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.conf.CacheMode; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.LayerHelper; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; @@ -42,7 +46,25 @@ import java.util.Collection; * * @see NVIDIA Deep Learning In A Nutshell */ -public interface Layer extends Serializable, Cloneable, Model, Trainable { +public interface Layer extends Serializable, Cloneable, Trainable, IModel { + + /** + * Return the configuration of this layer + * @return the configuration + */ + LayerConfiguration getLayerConfiguration(); + + /** + * Set a new layer configuration, new init() needs to be called afterwards. + * @param lconf layer configuration + */ + void setLayerConfiguration(LayerConfiguration lconf); + /** + * Convenient method to get the network configuration + * @return the configuration of the network this layer is part of + * + */ + NeuralNetConfiguration getNetConfiguration(); /** * This method sets given CacheMode for current layer @@ -107,23 +129,6 @@ public interface Layer extends Serializable, Cloneable, Model, Trainable { */ INDArray activate(INDArray input, boolean training, LayerWorkspaceMgr mgr); - /** - * Get the iteration listeners for this layer. - */ - Collection getListeners(); - - /** - * Set the {@link TrainingListener}s for this model. If any listeners have previously been set, - * they will be replaced by this method - */ - void setListeners(TrainingListener... listeners); - - /** - * Set the {@link TrainingListener}s for this model. If any listeners have previously been set, - * they will be replaced by this method - */ - void setListeners(Collection listeners); - /** * Get the layer index. */ diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/Model.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/Model.java deleted file mode 100644 index 53107fdc5..000000000 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/Model.java +++ /dev/null @@ -1,237 +0,0 @@ -/* - * ****************************************************************************** - * * - * * - * * This program and the accompanying materials are made available under the - * * terms of the Apache License, Version 2.0 which is available at - * * https://www.apache.org/licenses/LICENSE-2.0. - * * - * * See the NOTICE file distributed with this work for additional - * * information regarding copyright ownership. - * * Unless required by applicable law or agreed to in writing, software - * * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * * License for the specific language governing permissions and limitations - * * under the License. - * * - * * SPDX-License-Identifier: Apache-2.0 - * ***************************************************************************** - */ - -package org.deeplearning4j.nn.api; - -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.gradient.Gradient; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -import org.deeplearning4j.optimize.api.ConvexOptimizer; -import org.deeplearning4j.optimize.api.TrainingListener; -import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.common.primitives.Pair; - -import java.util.Collection; -import java.util.Map; - -public interface Model { - - /** - * Init the model - */ - void init(); - - - /** - * Set the trainingListeners for the ComputationGraph (and all layers in the network) - */ - void setListeners(Collection listeners); - - - /** - * Set the trainingListeners for the ComputationGraph (and all layers in the network) - */ - void setListeners(TrainingListener... listeners); - - /** - * This method ADDS additional TrainingListener to existing listeners - * - * @param listener - */ - void addListeners(TrainingListener... listener); - - - /** - * All models have a fit method - */ - @Deprecated - void fit(); - - /** - * Update layer weights and biases with gradient change - */ - void update(Gradient gradient); - - /** - * Perform one update applying the gradient - * @param gradient the gradient to apply - */ - void update(INDArray gradient, String paramType); - - - /** - * The score for the model - * @return the score for the model - */ - double score(); - - - /** - * Update the score - */ - void computeGradientAndScore(LayerWorkspaceMgr workspaceMgr); - - /** - * Parameters of the model (if any) - * @return the parameters of the model - */ - INDArray params(); - - /** - * the number of parameters for the model - * @return the number of parameters for the model - * - */ - long numParams(); - - - /** - * the number of parameters for the model - * @return the number of parameters for the model - * - */ - long numParams(boolean backwards); - - /** - * Set the parameters for this model. - * This expects a linear ndarray which then be unpacked internally - * relative to the expected ordering of the model - * @param params the parameters for the model - */ - void setParams(INDArray params); - - /** - * Set the initial parameters array as a view of the full (backprop) network parameters - * NOTE: this is intended to be used internally in MultiLayerNetwork and ComputationGraph, not by users. - * @param params a 1 x nParams row vector that is a view of the larger (MLN/CG) parameters array - */ - void setParamsViewArray(INDArray params); - - - INDArray getGradientsViewArray(); - - /** - * Set the gradients array as a view of the full (backprop) network parameters - * NOTE: this is intended to be used internally in MultiLayerNetwork and ComputationGraph, not by users. - * @param gradients a 1 x nParams row vector that is a view of the larger (MLN/CG) gradients array - */ - void setBackpropGradientsViewArray(INDArray gradients); - - /** - * Fit the model to the given data - * @param data the data to fit the model to - */ - void fit(INDArray data, LayerWorkspaceMgr workspaceMgr); - - - /** - * Get the gradient. Note that this method will not calculate the gradient, it will rather return the gradient - * that has been computed before. - * For calculating the gradient, see {@link Model#computeGradientAndScore(LayerWorkspaceMgr)} } . - * @return the gradient for this model, as calculated before - */ - Gradient gradient(); - - /** - * Get the gradient and score - * @return the gradient and score - */ - Pair gradientAndScore(); - - /** - * The current inputs batch size - * @return the current inputs batch size - */ - int batchSize(); - - - /** - * The configuration for the neural network - * @return the configuration for the neural network - */ - NeuralNetConfiguration conf(); - - /** - * Setter for the configuration - * @param conf - */ - void setConf(NeuralNetConfiguration conf); - - /** - * The input/feature matrix for the model - * @return the input/feature matrix for the model - */ - INDArray input(); - - /** - * Returns this models optimizer - * @return this models optimizer - */ - ConvexOptimizer getOptimizer(); - - /** - * Get the parameter - * @param param the key of the parameter - * @return the parameter vector/matrix with that particular key - */ - INDArray getParam(String param); - - /** - * The param table - * @return - */ - Map paramTable(); - - /** - * Table of parameters by key, for backprop - * For many models (dense layers, etc) - all parameters are backprop parameters - * @param backpropParamsOnly If true, return backprop params only. If false: return all params (equivalent to - * paramsTable()) - */ - Map paramTable(boolean backpropParamsOnly); - - /** - * Setter for the param table - * @param paramTable - */ - void setParamTable(Map paramTable); - - - /** - * Set the parameter with a new ndarray - * @param key the key to se t - * @param val the new ndarray - */ - void setParam(String key, INDArray val); - - /** - * Clear input - */ - void clear(); - - - /** - * Apply any constraints to the model - */ - void applyConstraints(int iteration, int epoch); - - - void close(); -} diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/ModelAdapter.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/ModelAdapter.java index 01a60b73e..1f87ea69b 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/ModelAdapter.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/ModelAdapter.java @@ -20,6 +20,7 @@ package org.deeplearning4j.nn.api; +import net.brutex.ai.dnn.api.IModel; import org.nd4j.adapters.OutputAdapter; import org.nd4j.linalg.api.ndarray.INDArray; @@ -28,5 +29,5 @@ public interface ModelAdapter extends OutputAdapter { * This method invokes model internally, and does conversion to T * @return */ - T apply(Model model, INDArray[] inputs, INDArray[] inputMasks, INDArray[] labelsMasks); + T apply(IModel model, INDArray[] inputs, INDArray[] inputMasks, INDArray[] labelsMasks); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/ParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/ParamInitializer.java index 7b6483483..2505e05f8 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/ParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/ParamInitializer.java @@ -21,7 +21,7 @@ package org.deeplearning4j.nn.api; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.nd4j.linalg.api.ndarray.INDArray; import java.util.List; @@ -34,9 +34,9 @@ import java.util.Map; */ public interface ParamInitializer { - long numParams(NeuralNetConfiguration conf); - - long numParams(org.deeplearning4j.nn.conf.layers.Layer layer); + long numParams(LayerConfiguration layer); + @Deprecated + long numParams(NeuralNetConfiguration netConfiguration); /** * Get a list of all parameter keys given the layer configuration @@ -44,7 +44,7 @@ public interface ParamInitializer { * @param layer ILayer * @return All parameter keys */ - List paramKeys(org.deeplearning4j.nn.conf.layers.Layer layer); + List paramKeys(LayerConfiguration layer); /** * Weight parameter keys given the layer configuration @@ -52,7 +52,7 @@ public interface ParamInitializer { * @param layer ILayer * @return Weight parameter keys */ - List weightKeys(org.deeplearning4j.nn.conf.layers.Layer layer); + List weightKeys(LayerConfiguration layer); /** * Bias parameter keys given the layer configuration @@ -60,7 +60,7 @@ public interface ParamInitializer { * @param layer ILayer * @return Bias parameter keys */ - List biasKeys(org.deeplearning4j.nn.conf.layers.Layer layer); + List biasKeys(LayerConfiguration layer); /** * Is the specified parameter a weight? @@ -69,7 +69,7 @@ public interface ParamInitializer { * @param key Key to check * @return True if parameter is a weight */ - boolean isWeightParam(Layer layer, String key); + boolean isWeightParam(LayerConfiguration layer, String key); /** * Is the specified parameter a bias? @@ -78,18 +78,18 @@ public interface ParamInitializer { * @param key Key to check * @return True if parameter is a bias */ - boolean isBiasParam(Layer layer, String key); + boolean isBiasParam(LayerConfiguration layer, String key); /** * Initialize the parameters * - * @param conf the configuration + * @param conf the configuration of the layer * @param paramsView a view of the full network (backprop) parameters * @param initializeParams if true: initialize the parameters according to the configuration. If false: don't modify the * values in the paramsView array (but do select out the appropriate subset, reshape etc as required) * @return Map of parameters keyed by type (view of the 'paramsView' array) */ - Map init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams); + Map init(LayerConfiguration conf, INDArray paramsView, boolean initializeParams); /** * Return a map of gradients (in their standard non-flattened representation), taken from the flattened (row vector) gradientView array. @@ -100,6 +100,6 @@ public interface ParamInitializer { * @param gradientView The flattened gradients array, as a view of the larger array * @return A map containing an array by parameter type, that is a view of the full network gradients array */ - Map getGradientsFromFlattened(NeuralNetConfiguration conf, INDArray gradientView); + Map getGradientsFromFlattened(LayerConfiguration conf, INDArray gradientView); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/Trainable.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/Trainable.java index f93e1c5ee..33f87a736 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/Trainable.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/Trainable.java @@ -42,10 +42,29 @@ public interface Trainable { INDArray params(); /** - * @param backpropOnly If true: return only parameters that are not exclusively used for layerwise pretraining - * @return Parameter table + * The param table + * + * @return */ - Map paramTable(boolean backpropOnly); + Map getParamTable(); + + /** + * Table of parameters by key, for backprop. For many models (dense layers, etc) - all parameters + * are backprop parameters + * + * @param backpropParamsOnly If true, return backprop params only. If false: return all params + * (equivalent to paramsTable()) + */ + Map getParamTable(boolean backpropParamsOnly); + + /** + * Setter for the param table + * + * @param paramTable + */ + void setParamTable(Map paramTable); + + /** * DL4J layers typically produce the sum of the gradients during the backward pass for each layer, and if required diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/layers/RecurrentLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/layers/RecurrentLayer.java index a4f73d3b0..61c50b161 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/layers/RecurrentLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/layers/RecurrentLayer.java @@ -98,6 +98,4 @@ public interface RecurrentLayer extends Layer { * for standard BPTT. */ Pair tbpttBackpropGradient(INDArray epsilon, int tbpttBackLength, LayerWorkspaceMgr workspaceMgr); - - } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/ComputationGraphConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/ComputationGraphConfiguration.java index 8fe4b99a3..afba61743 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/ComputationGraphConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/ComputationGraphConfiguration.java @@ -29,8 +29,7 @@ import org.deeplearning4j.nn.conf.graph.rnn.LastTimeStepVertex; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.BaseLayer; import org.deeplearning4j.nn.conf.layers.GlobalPoolingLayer; -import org.deeplearning4j.nn.conf.layers.Layer; -import org.deeplearning4j.nn.conf.layers.OutputLayer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.recurrent.LastTimeStep; import org.deeplearning4j.nn.conf.layers.samediff.SameDiffVertex; import org.deeplearning4j.nn.conf.memory.MemoryReport; @@ -140,7 +139,7 @@ public class ComputationGraphConfiguration implements Serializable, Cloneable { * @return JSON representation of computation graph configuration */ public String toJson() { - //As per MultiLayerConfiguration.toJson() + //As per NeuralNetConfiguration.toJson() ObjectMapper mapper = NeuralNetConfiguration.mapper(); synchronized (mapper) { //JSON mappers are supposed to be thread safe: however, in practice they seem to miss fields occasionally @@ -160,7 +159,7 @@ public class ComputationGraphConfiguration implements Serializable, Cloneable { * @return {@link ComputationGraphConfiguration} */ public static ComputationGraphConfiguration fromJson(String json) { - //As per MultiLayerConfiguration.fromJson() + //As per NeuralNetConfiguration.fromJson() ObjectMapper mapper = NeuralNetConfiguration.mapper(); ComputationGraphConfiguration conf; try { @@ -171,7 +170,7 @@ public class ComputationGraphConfiguration implements Serializable, Cloneable { //JSON may be legacy (1.0.0-alpha or earlier), attempt to load it using old format return JsonMappers.getLegacyMapper().readValue(json, ComputationGraphConfiguration.class); } catch (InvalidTypeIdException e2){ - //Check for legacy custom layers: "Could not resolve type id 'CustomLayer' as a subtype of [simple type, class org.deeplearning4j.nn.conf.layers.Layer]: known type ids = [Bidirectional, CenterLossOutputLayer, CnnLossLayer, ..." + //Check for legacy custom layers: "Could not resolve type id 'CustomLayer' as a subtype of [simple type, class org.deeplearning4j.nn.conf.layers.LayerConfiguration]: known type ids = [Bidirectional, CenterLossOutputLayer, CnnLossLayer, ..." //1.0.0-beta5: dropping support for custom layers defined in pre-1.0.0-beta format. Built-in layers from these formats still work String msg = e2.getMessage(); if(msg != null && msg.contains("Could not resolve type id")){ @@ -207,8 +206,8 @@ public class ComputationGraphConfiguration implements Serializable, Cloneable { } LayerVertex lv = (LayerVertex) entry.getValue(); - if (lv.getLayerConf() != null && lv.getLayerConf().getLayer() != null) { - Layer layer = lv.getLayerConf().getLayer(); + if (lv.getNetConfiguration() != null && lv.getLayerConfiguration() != null) { + LayerConfiguration layer = lv.getLayerConfiguration(); if (layer instanceof BaseLayer && ((BaseLayer) layer).getActivationFn() == null) { String layerName = layer.getLayerName(); @@ -240,7 +239,7 @@ public class ComputationGraphConfiguration implements Serializable, Cloneable { } } catch (IOException e) { - log.warn("Layer with null ActivationFn field or pre-0.7.2 activation function detected: could not parse JSON", + log.warn("LayerConfiguration with null ActivationFn field or pre-0.7.2 activation function detected: could not parse JSON", e); } } @@ -257,7 +256,7 @@ public class ComputationGraphConfiguration implements Serializable, Cloneable { * above. * @return True if all is well and layer iteration shall continue. False else-wise. */ - private static void handleLegacyWeightInitFromJson(String json, Layer layer, ObjectMapper mapper, JsonNode vertices) { + private static void handleLegacyWeightInitFromJson(String json, LayerConfiguration layer, ObjectMapper mapper, JsonNode vertices) { if (layer instanceof BaseLayer && ((BaseLayer) layer).getWeightInitFn() == null) { String layerName = layer.getLayerName(); @@ -294,7 +293,7 @@ public class ComputationGraphConfiguration implements Serializable, Cloneable { } } catch (IOException e) { - log.warn("Layer with null ActivationFn field or pre-0.7.2 activation function detected: could not parse JSON", + log.warn("LayerConfiguration with null ActivationFn field or pre-0.7.2 activation function detected: could not parse JSON", e); } } @@ -331,7 +330,7 @@ public class ComputationGraphConfiguration implements Serializable, Cloneable { conf.trainingWorkspaceMode = trainingWorkspaceMode; conf.inferenceWorkspaceMode = inferenceWorkspaceMode; conf.cacheMode = this.cacheMode; - conf.defaultConfiguration.cacheMode = this.cacheMode; + conf.defaultConfiguration.setCacheMode(this.cacheMode); conf.validateOutputLayerConfig = this.validateOutputLayerConfig; conf.dataType = this.dataType; @@ -517,7 +516,7 @@ public class ComputationGraphConfiguration implements Serializable, Cloneable { inputTypeList.add(layerInput); LayerVertex lv = (LayerVertex) gv; - Layer l = lv.getLayerConf().getLayer(); + LayerConfiguration l = lv.getLayerConfiguration(); //Preprocessors - add if necessary if (lv.getPreProcessor() == null && addPreprocIfNecessary) { @@ -710,7 +709,7 @@ public class ComputationGraphConfiguration implements Serializable, Cloneable { protected Map inputPreProcessors = new LinkedHashMap<>(); - protected NeuralNetConfiguration.Builder globalConfiguration; + protected NeuralNetConfiguration globalConfiguration; protected boolean allowDisconnected = false; protected boolean allowNoOutput = false; @@ -719,11 +718,11 @@ public class ComputationGraphConfiguration implements Serializable, Cloneable { protected String lastAdded = null; - public GraphBuilder(NeuralNetConfiguration.Builder globalConfiguration) { - this.globalConfiguration = globalConfiguration; + public GraphBuilder(NeuralNetConfiguration.NeuralNetConfigurationBuilder globalConfiguration) { + this.globalConfiguration = globalConfiguration.build(); } - public GraphBuilder(ComputationGraphConfiguration newConf, NeuralNetConfiguration.Builder globalConfiguration) { + public GraphBuilder(ComputationGraphConfiguration newConf, NeuralNetConfiguration globalConfiguration) { ComputationGraphConfiguration clonedConf = newConf.clone(); @@ -742,7 +741,7 @@ public class ComputationGraphConfiguration implements Serializable, Cloneable { /** * Specify the processors for a given layer * These are used at each layer for doing things like normalization and shaping of input.
- * Note: preprocessors can also be defined using the {@link #addLayer(String, Layer, InputPreProcessor, String...)} method. + * Note: preprocessors can also be defined using the {@link #addLayer(String, LayerConfiguration, InputPreProcessor, String...)} method. * * @param layer the name of the layer that this preprocessor will be used with * @param processor the preprocessor to use for the specified layer @@ -776,7 +775,7 @@ public class ComputationGraphConfiguration implements Serializable, Cloneable { * * @param forwardLength Forward length > 0, >= backwardLength */ - public GraphBuilder tBPTTForwardLength(int forwardLength) { + public GraphBuilder tbpttFwdLength(int forwardLength) { this.tbpttFwdLength = forwardLength; return this; } @@ -789,7 +788,7 @@ public class ComputationGraphConfiguration implements Serializable, Cloneable { * * @param backwardLength <= forwardLength */ - public GraphBuilder tBPTTBackwardLength(int backwardLength) { + public GraphBuilder tbpttBackLength(int backwardLength) { this.tbpttBackLength = backwardLength; return this; } @@ -802,8 +801,8 @@ public class ComputationGraphConfiguration implements Serializable, Cloneable { * @param tbpttLength length > 0 */ public GraphBuilder tBPTTLength(int tbpttLength){ - tBPTTForwardLength(tbpttLength); - return tBPTTBackwardLength(tbpttLength); + tbpttFwdLength(tbpttLength); + return tbpttBackLength(tbpttLength); } /** @@ -813,9 +812,9 @@ public class ComputationGraphConfiguration implements Serializable, Cloneable { * @param layer The layer configuration * @param layerInputs Inputs to this layer. Inputs may be other layers, GraphVertex objects, * on a combination of the two. - * @see #addLayer(String, Layer, InputPreProcessor, String...) + * @see #addLayer(String, LayerConfiguration, InputPreProcessor, String...) */ - public GraphBuilder addLayer(String layerName, Layer layer, String... layerInputs) { + public GraphBuilder addLayer(String layerName, LayerConfiguration layer, String... layerInputs) { return addLayer(layerName, layer, null, layerInputs); } @@ -825,9 +824,9 @@ public class ComputationGraphConfiguration implements Serializable, Cloneable { * * @param layerName Name/label of the layer to add * @param layer The layer configuration - * @see #addLayer(String, Layer, InputPreProcessor, String...) + * @see #addLayer(String, LayerConfiguration, InputPreProcessor, String...) */ - public GraphBuilder appendLayer(String layerName, Layer layer) { + public GraphBuilder appendLayer(String layerName, LayerConfiguration layer) { return appendLayer(layerName, layer, null); } @@ -838,9 +837,9 @@ public class ComputationGraphConfiguration implements Serializable, Cloneable { * @param layer The layer configuration * @param layerInputs Inputs to this layer. Inputs may be other layers, GraphVertex objects, * on a combination of the two. - * @see #addLayer(String, Layer, InputPreProcessor, String...) + * @see #addLayer(String, LayerConfiguration, InputPreProcessor, String...) */ - public GraphBuilder layer(int layerName, Layer layer, String... layerInputs) { + public GraphBuilder layer(int layerName, LayerConfiguration layer, String... layerInputs) { return addLayer(String.valueOf(layerName), layer, null, layerInputs); } @@ -851,9 +850,9 @@ public class ComputationGraphConfiguration implements Serializable, Cloneable { * @param layer The layer configuration * @param layerInputs Inputs to this layer. Inputs may be other layers, GraphVertex objects, * on a combination of the two. - * @see #addLayer(String, Layer, InputPreProcessor, String...) + * @see #addLayer(String, LayerConfiguration, InputPreProcessor, String...) */ - public GraphBuilder layer(String layerName, Layer layer, String... layerInputs) { + public GraphBuilder layer(String layerName, LayerConfiguration layer, String... layerInputs) { return addLayer(layerName, layer, null, layerInputs); } @@ -866,11 +865,11 @@ public class ComputationGraphConfiguration implements Serializable, Cloneable { * @param layerInputs Inputs to this layer. Inputs may be other layers, GraphVertex objects, * on a combination of the two. */ - public GraphBuilder addLayer(String layerName, Layer layer, InputPreProcessor preProcessor, + public GraphBuilder addLayer(String layerName, LayerConfiguration layer, InputPreProcessor preProcessor, String... layerInputs) { - NeuralNetConfiguration.Builder builder = globalConfiguration.clone(); - builder.layer(layer); - addVertex(layerName, new LayerVertex(builder.build(), preProcessor), layerInputs); + NeuralNetConfiguration conf = globalConfiguration.clone(); + conf.getLayerConfigurations().add(layer); + addVertex(layerName, new LayerVertex(conf, preProcessor), layerInputs); layer.setLayerName(layerName); return this; } @@ -883,7 +882,7 @@ public class ComputationGraphConfiguration implements Serializable, Cloneable { * @param layer The layer configuration * @param preProcessor The InputPreProcessor to use with this layer. */ - public GraphBuilder appendLayer(String layerName, Layer layer, InputPreProcessor preProcessor) { + public GraphBuilder appendLayer(String layerName, LayerConfiguration layer, InputPreProcessor preProcessor) { if(lastAdded == null){ throw new IllegalStateException("Can not use appendLayer with no previous layers"); @@ -902,7 +901,7 @@ public class ComputationGraphConfiguration implements Serializable, Cloneable { * @param layerInputs Inputs to this layer. Inputs may be other layers, GraphVertex objects, * on a combination of the two. */ - public GraphBuilder layer(String layerName, Layer layer, InputPreProcessor preProcessor, + public GraphBuilder layer(String layerName, LayerConfiguration layer, InputPreProcessor preProcessor, String... layerInputs) { return addLayer(layerName, layer, preProcessor, layerInputs); } @@ -1173,13 +1172,13 @@ public class ComputationGraphConfiguration implements Serializable, Cloneable { conf.vertices = this.vertices; conf.vertexInputs = this.vertexInputs; - conf.trainingWorkspaceMode = globalConfiguration.trainingWorkspaceMode; - conf.inferenceWorkspaceMode = globalConfiguration.inferenceWorkspaceMode; - conf.cacheMode = globalConfiguration.cacheMode; + conf.trainingWorkspaceMode = getGlobalConfiguration().getTrainingWorkspaceMode(); + conf.inferenceWorkspaceMode = getGlobalConfiguration().getInferenceWorkspaceMode(); + conf.cacheMode = globalConfiguration.getCacheMode(); conf.validateOutputLayerConfig = validateOutputConfig; - conf.dataType = globalConfiguration.dataType; + conf.dataType = globalConfiguration.getDataType(); - conf.defaultConfiguration = globalConfiguration.build(); + conf.defaultConfiguration = globalConfiguration; //Add preprocessors that were defined separately to the Layers to which they belong for (Map.Entry entry : inputPreProcessors.entrySet()) { @@ -1198,7 +1197,7 @@ public class ComputationGraphConfiguration implements Serializable, Cloneable { for (Map.Entry gv : vertices.entrySet()) { if (gv.getValue() instanceof LayerVertex) { LayerVertex lv = (LayerVertex) gv.getValue(); - Layer l = lv.getLayerConf().getLayer(); + LayerConfiguration l = lv.getLayerConfiguration(); } if (gv.getValue() instanceof SameDiffVertex) ((SameDiffVertex) gv.getValue()).applyGlobalConfig(globalConfiguration); @@ -1226,7 +1225,7 @@ public class ComputationGraphConfiguration implements Serializable, Cloneable { //Validate output layer configurations... for (Map.Entry e : conf.getVertices().entrySet()) { if (e.getValue() instanceof LayerVertex) { - Layer l = ((LayerVertex) e.getValue()).getLayerConf().getLayer(); + LayerConfiguration l = ((LayerVertex) e.getValue()).getLayerConfiguration(); OutputLayerUtil.validateOutputLayer(e.getKey(), l); //No-op for non output/loss layers } } @@ -1236,7 +1235,7 @@ public class ComputationGraphConfiguration implements Serializable, Cloneable { //Check for invalid combination - tbptt plus LastTimeStepLayer or for(Map.Entry e : vertices.entrySet()){ GraphVertex gv = e.getValue(); - Layer l = (gv instanceof LayerVertex ? ((LayerVertex)gv).getLayerConf().getLayer() : null); + LayerConfiguration l = (gv instanceof LayerVertex ? ((LayerVertex)gv).getLayerConfiguration() : null); if(gv instanceof LastTimeStepVertex || (l != null && (l instanceof LastTimeStep || l instanceof GlobalPoolingLayer))){ String s = (l == null ? gv.getClass().getName() : l.getClass().getName()); String n = e.getKey(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/MultiLayerConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/MultiLayerConfiguration.java deleted file mode 100644 index 47baaebfd..000000000 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/MultiLayerConfiguration.java +++ /dev/null @@ -1,841 +0,0 @@ -/* - * ****************************************************************************** - * * - * * - * * This program and the accompanying materials are made available under the - * * terms of the Apache License, Version 2.0 which is available at - * * https://www.apache.org/licenses/LICENSE-2.0. - * * - * * See the NOTICE file distributed with this work for additional - * * information regarding copyright ownership. - * * Unless required by applicable law or agreed to in writing, software - * * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * * License for the specific language governing permissions and limitations - * * under the License. - * * - * * SPDX-License-Identifier: Apache-2.0 - * ***************************************************************************** - */ - - -package org.deeplearning4j.nn.conf; - -import lombok.*; -import lombok.extern.slf4j.Slf4j; -import org.deeplearning4j.nn.conf.distribution.Distribution; -import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.conf.layers.*; -import org.deeplearning4j.nn.conf.layers.recurrent.LastTimeStep; -import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; -import org.deeplearning4j.nn.conf.memory.MemoryReport; -import org.deeplearning4j.nn.conf.memory.NetworkMemoryReport; -import org.deeplearning4j.nn.conf.serde.JsonMappers; -import org.deeplearning4j.nn.weights.IWeightInit; -import org.deeplearning4j.nn.weights.WeightInit; -import org.deeplearning4j.util.OutputLayerUtil; -import org.nd4j.linalg.activations.Activation; -import org.nd4j.linalg.activations.IActivation; -import org.nd4j.linalg.api.buffer.DataType; -import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.linalg.lossfunctions.LossFunctions; -import org.nd4j.linalg.lossfunctions.impl.LossBinaryXENT; -import org.nd4j.linalg.lossfunctions.impl.LossMCXENT; -import org.nd4j.linalg.lossfunctions.impl.LossMSE; -import org.nd4j.linalg.lossfunctions.impl.LossNegativeLogLikelihood; -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.exc.InvalidTypeIdException; -import com.fasterxml.jackson.databind.node.ArrayNode; - -import java.io.IOException; -import java.io.Serializable; -import java.util.*; - -/** - * Deeplearning4j is a domain-specific language to configure deep neural networks, which are made of - * multiple layers. Everything starts with a MultiLayerConfiguration, which organizes those layers - * and their hyperparameters. Hyperparameters are variables that determine how a neural network - * learns. They include how many times to update the weights of the model, how to initialize those - * weights, which activation function to attach to the nodes, which optimization algorithm to use, - * and how fast the model should learn. This is what one configuration would look like: - *

- * - * MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
- * .weightInit(WeightInit.XAVIER) .activation(Activation.RELU)
- * .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
- * .updater(new Sgd(0.05)) //... other hyperparameters
- * .list() .backprop(true)
- * .build();

- * - * With Deeplearning4j, you add a layer - * by calling layer on the NeuralNetConfiguration.Builder(), specifying its place in the order of - * layers (the zero-indexed layer below is the input layer), the number of input and output nodes, - * nIn and nOut, as well as the type: DenseLayer.

- * - * .layer(0, new DenseLayer.Builder().nIn(784).nOut(250)
- * .build())

- * - * Once you've configured your net, you train the - * model with model.fit. - */ -@Data -@AllArgsConstructor(access = AccessLevel.PRIVATE) -@NoArgsConstructor -@Slf4j -public class MultiLayerConfiguration implements Serializable, Cloneable { - - protected List confs; - protected Map inputPreProcessors = new HashMap<>(); - protected BackpropType backpropType = BackpropType.Standard; - protected int tbpttFwdLength = 20; - protected int tbpttBackLength = 20; - protected boolean validateOutputLayerConfig = true; //Default to legacy for pre 1.0.0-beta3 networks on deserialization - - @Getter - @Setter - protected WorkspaceMode trainingWorkspaceMode = WorkspaceMode.ENABLED; - - @Getter - @Setter - protected WorkspaceMode inferenceWorkspaceMode = WorkspaceMode.ENABLED; - - @Getter - @Setter - protected CacheMode cacheMode; - - @Getter - @Setter - protected DataType dataType = DataType.FLOAT; //Default to float for deserialization of beta3 and earlier nets - - //Counter for the number of parameter updates so far - // This is important for learning rate schedules, for example, and is stored here to ensure it is persisted - // for Spark and model serialization - protected int iterationCount = 0; - - //Counter for the number of epochs completed so far. Used for per-epoch schedules - protected int epochCount = 0; - - /** - * Create a neural net configuration from json - * - * @param json the neural net configuration from json - * @return {@link MultiLayerConfiguration} - */ - public static MultiLayerConfiguration fromYaml(String json) { - ObjectMapper mapper = NeuralNetConfiguration.mapperYaml(); - try { - return mapper.readValue(json, MultiLayerConfiguration.class); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - /** - * Create a neural net configuration from json - * - * @param json the neural net configuration from json - * @return {@link MultiLayerConfiguration} - */ - public static MultiLayerConfiguration fromJson(String json) { - MultiLayerConfiguration conf; - ObjectMapper mapper = NeuralNetConfiguration.mapper(); - try { - conf = mapper.readValue(json, MultiLayerConfiguration.class); - } catch (InvalidTypeIdException e) { - if (e.getMessage().contains("@class")) { - try { - //JSON may be legacy (1.0.0-alpha or earlier), attempt to load it using old format - return JsonMappers.getLegacyMapper().readValue(json, MultiLayerConfiguration.class); - } catch (InvalidTypeIdException e2) { - //Check for legacy custom layers: "Could not resolve type id 'CustomLayer' as a subtype of [simple type, class org.deeplearning4j.nn.conf.layers.Layer]: known type ids = [Bidirectional, CenterLossOutputLayer, CnnLossLayer, ..." - //1.0.0-beta5: dropping support for custom layers defined in pre-1.0.0-beta format. Built-in layers from these formats still work - String msg = e2.getMessage(); - if (msg != null && msg.contains("Could not resolve type id")) { - throw new RuntimeException( - "Error deserializing MultiLayerConfiguration - configuration may have a custom " + - "layer, vertex or preprocessor, in pre version 1.0.0-beta JSON format.\nModels in legacy format with custom" - + - " layers should be loaded in 1.0.0-beta to 1.0.0-beta4 and saved again, before loading in the current version of DL4J", - e); - } - throw new RuntimeException(e2); - } catch (IOException e2) { - throw new RuntimeException(e2); - } - } - throw new RuntimeException(e); - } catch (IOException e) { - //Check if this exception came from legacy deserializer... - String msg = e.getMessage(); - if (msg != null && msg.contains("legacy")) { - throw new RuntimeException( - "Error deserializing MultiLayerConfiguration - configuration may have a custom " + - "layer, vertex or preprocessor, in pre version 1.0.0-alpha JSON format. These layers can be " - + - "deserialized by first registering them with NeuralNetConfiguration.registerLegacyCustomClassesForJSON(Class...)", - e); - } - throw new RuntimeException(e); - } - - //To maintain backward compatibility after loss function refactoring (configs generated with v0.5.0 or earlier) - // Previously: enumeration used for loss functions. Now: use classes - // IN the past, could have only been an OutputLayer or RnnOutputLayer using these enums - int layerCount = 0; - JsonNode confs = null; - for (NeuralNetConfiguration nnc : conf.getConfs()) { - Layer l = nnc.getLayer(); - if (l instanceof BaseOutputLayer && ((BaseOutputLayer) l).getLossFn() == null) { - //lossFn field null -> may be an old config format, with lossFunction field being for the enum - //if so, try walking the JSON graph to extract out the appropriate enum value - - BaseOutputLayer ol = (BaseOutputLayer) l; - try { - JsonNode jsonNode = mapper.readTree(json); - if (confs == null) { - confs = jsonNode.get("confs"); - } - if (confs instanceof ArrayNode) { - ArrayNode layerConfs = (ArrayNode) confs; - JsonNode outputLayerNNCNode = layerConfs.get(layerCount); - if (outputLayerNNCNode == null) { - return conf; //Should never happen... - } - JsonNode outputLayerNode = outputLayerNNCNode.get("layer"); - - JsonNode lossFunctionNode = null; - if (outputLayerNode.has("output")) { - lossFunctionNode = outputLayerNode.get("output").get("lossFunction"); - } else if (outputLayerNode.has("rnnoutput")) { - lossFunctionNode = outputLayerNode.get("rnnoutput").get("lossFunction"); - } - - if (lossFunctionNode != null) { - String lossFunctionEnumStr = lossFunctionNode.asText(); - LossFunctions.LossFunction lossFunction = null; - try { - lossFunction = LossFunctions.LossFunction.valueOf(lossFunctionEnumStr); - } catch (Exception e) { - log.warn( - "OutputLayer with null LossFunction or pre-0.6.0 loss function configuration detected: could not parse JSON", - e); - } - - if (lossFunction != null) { - switch (lossFunction) { - case MSE: - ol.setLossFn(new LossMSE()); - break; - case XENT: - ol.setLossFn(new LossBinaryXENT()); - break; - case NEGATIVELOGLIKELIHOOD: - ol.setLossFn(new LossNegativeLogLikelihood()); - break; - case MCXENT: - ol.setLossFn(new LossMCXENT()); - break; - - //Remaining: TODO - case SQUARED_LOSS: - case RECONSTRUCTION_CROSSENTROPY: - default: - log.warn( - "OutputLayer with null LossFunction or pre-0.6.0 loss function configuration detected: could not set loss function for {}", - lossFunction); - break; - } - } - } - - } else { - log.warn( - "OutputLayer with null LossFunction or pre-0.6.0 loss function configuration detected: could not parse JSON: layer 'confs' field is not an ArrayNode (is: {})", - (confs != null ? confs.getClass() : null)); - } - } catch (IOException e) { - log.warn( - "OutputLayer with null LossFunction or pre-0.6.0 loss function configuration detected: could not parse JSON", - e); - break; - } - } - - //Also, pre 0.7.2: activation functions were Strings ("activationFunction" field), not classes ("activationFn") - //Try to load the old format if necessary, and create the appropriate IActivation instance - if ((l instanceof BaseLayer) && ((BaseLayer) l).getActivationFn() == null) { - try { - JsonNode jsonNode = mapper.readTree(json); - if (confs == null) { - confs = jsonNode.get("confs"); - } - if (confs instanceof ArrayNode) { - ArrayNode layerConfs = (ArrayNode) confs; - JsonNode outputLayerNNCNode = layerConfs.get(layerCount); - if (outputLayerNNCNode == null) { - return conf; //Should never happen... - } - JsonNode layerWrapperNode = outputLayerNNCNode.get("layer"); - - if (layerWrapperNode == null || layerWrapperNode.size() != 1) { - continue; - } - - JsonNode layerNode = layerWrapperNode.elements().next(); - JsonNode activationFunction = layerNode.get( - "activationFunction"); //Should only have 1 element: "dense", "output", etc - - if (activationFunction != null) { - IActivation ia = Activation.fromString(activationFunction.asText()) - .getActivationFunction(); - ((BaseLayer) l).setActivationFn(ia); - } - } - - } catch (IOException e) { - log.warn( - "Layer with null ActivationFn field or pre-0.7.2 activation function detected: could not parse JSON", - e); - } - } - - if (!handleLegacyWeightInitFromJson(json, l, mapper, confs, layerCount)) { - return conf; - } - - layerCount++; - } - return conf; - } - - /** - * Handle {@link WeightInit} and {@link Distribution} from legacy configs in Json format. Copied - * from handling of {@link Activation} above. - * - * @return True if all is well and layer iteration shall continue. False else-wise. - */ - private static boolean handleLegacyWeightInitFromJson(String json, Layer l, ObjectMapper mapper, - JsonNode confs, int layerCount) { - if ((l instanceof BaseLayer) && ((BaseLayer) l).getWeightInitFn() == null) { - try { - JsonNode jsonNode = mapper.readTree(json); - if (confs == null) { - confs = jsonNode.get("confs"); - } - if (confs instanceof ArrayNode) { - ArrayNode layerConfs = (ArrayNode) confs; - JsonNode outputLayerNNCNode = layerConfs.get(layerCount); - if (outputLayerNNCNode == null) { - return false; //Should never happen... - } - JsonNode layerWrapperNode = outputLayerNNCNode.get("layer"); - - if (layerWrapperNode == null || layerWrapperNode.size() != 1) { - return true; - } - - JsonNode layerNode = layerWrapperNode.elements().next(); - JsonNode weightInit = layerNode.get( - "weightInit"); //Should only have 1 element: "dense", "output", etc - JsonNode distribution = layerNode.get("dist"); - - Distribution dist = null; - if (distribution != null) { - dist = mapper.treeToValue(distribution, Distribution.class); - } - - if (weightInit != null) { - final IWeightInit wi = WeightInit.valueOf(weightInit.asText()) - .getWeightInitFunction(dist); - ((BaseLayer) l).setWeightInitFn(wi); - } - } - - } catch (IOException e) { - log.warn( - "Layer with null WeightInit detected: " + l.getLayerName() + ", could not parse JSON", - e); - } - } - return true; - - } - - public int getEpochCount() { - return epochCount; - } - - public void setEpochCount(int epochCount) { - this.epochCount = epochCount; - for (int i = 0; i < confs.size(); i++) { - getConf(i).setEpochCount(epochCount); - } - } - - /** - * @return JSON representation of NN configuration - */ - public String toYaml() { - ObjectMapper mapper = NeuralNetConfiguration.mapperYaml(); - synchronized (mapper) { - try { - return mapper.writeValueAsString(this); - } catch (com.fasterxml.jackson.core.JsonProcessingException e) { - throw new RuntimeException(e); - } - } - } - - /** - * @return JSON representation of NN configuration - */ - public String toJson() { - ObjectMapper mapper = NeuralNetConfiguration.mapper(); - synchronized (mapper) { - //JSON mappers are supposed to be thread safe: however, in practice they seem to miss fields occasionally - //when writeValueAsString is used by multiple threads. This results in invalid JSON. See issue #3243 - try { - return mapper.writeValueAsString(this); - } catch (com.fasterxml.jackson.core.JsonProcessingException e) { - throw new RuntimeException(e); - } - } - } - - @Override - public String toString() { - return toJson(); - } - - public NeuralNetConfiguration getConf(int i) { - return confs.get(i); - } - - @Override - public MultiLayerConfiguration clone() { - try { - MultiLayerConfiguration clone = (MultiLayerConfiguration) super.clone(); - - if (clone.confs != null) { - List list = new ArrayList<>(); - for (NeuralNetConfiguration conf : clone.confs) { - list.add(conf.clone()); - } - clone.confs = list; - } - - if (clone.inputPreProcessors != null) { - Map map = new HashMap<>(); - for (Map.Entry entry : clone.inputPreProcessors.entrySet()) { - map.put(entry.getKey(), entry.getValue().clone()); - } - clone.inputPreProcessors = map; - } - - clone.inferenceWorkspaceMode = this.inferenceWorkspaceMode; - clone.trainingWorkspaceMode = this.trainingWorkspaceMode; - clone.cacheMode = this.cacheMode; - clone.validateOutputLayerConfig = this.validateOutputLayerConfig; - clone.dataType = this.dataType; - - return clone; - - } catch (CloneNotSupportedException e) { - throw new RuntimeException(e); - } - } - - public InputPreProcessor getInputPreProcess(int curr) { - return inputPreProcessors.get(curr); - } - - /** - * Get a {@link MemoryReport} for the given MultiLayerConfiguration. This is used to estimate the - * memory requirements for the given network configuration and input - * - * @param inputType Input types for the network - * @return Memory report for the network - */ - public NetworkMemoryReport getMemoryReport(InputType inputType) { - - Map memoryReportMap = new LinkedHashMap<>(); - int nLayers = confs.size(); - for (int i = 0; i < nLayers; i++) { - String layerName = confs.get(i).getLayer().getLayerName(); - if (layerName == null) { - layerName = String.valueOf(i); - } - - //Pass input type through preprocessor, if necessary - InputPreProcessor preproc = getInputPreProcess(i); - //TODO memory requirements for preprocessor - if (preproc != null) { - inputType = preproc.getOutputType(inputType); - } - - LayerMemoryReport report = confs.get(i).getLayer().getMemoryReport(inputType); - memoryReportMap.put(layerName, report); - - inputType = confs.get(i).getLayer().getOutputType(i, inputType); - } - - return new NetworkMemoryReport(memoryReportMap, MultiLayerConfiguration.class, - "MultiLayerNetwork", inputType); - } - - /** - * For the given input shape/type for the network, return a list of activation sizes for each - * layer in the network.
i.e., list.get(i) is the output activation sizes for layer i - * - * @param inputType Input type for the network - * @return A lits of activation types for the network, indexed by layer number - */ - public List getLayerActivationTypes(@NonNull InputType inputType) { - List out = new ArrayList<>(); - int nLayers = confs.size(); - for (int i = 0; i < nLayers; i++) { - InputPreProcessor preproc = getInputPreProcess(i); - if (preproc != null) { - inputType = preproc.getOutputType(inputType); - } - - inputType = confs.get(i).getLayer().getOutputType(i, inputType); - out.add(inputType); - } - return out; - } - - @Data - public static class Builder { - - private static final int DEFAULT_TBPTT_LENGTH = 20; - - protected List confs = new ArrayList<>(); - protected double dampingFactor = 100; - protected Map inputPreProcessors = new HashMap<>(); - protected BackpropType backpropType = BackpropType.Standard; - protected int tbpttFwdLength = DEFAULT_TBPTT_LENGTH; - protected int tbpttBackLength = DEFAULT_TBPTT_LENGTH; - protected InputType inputType; - - protected WorkspaceMode trainingWorkspaceMode = WorkspaceMode.ENABLED; - protected WorkspaceMode inferenceWorkspaceMode = WorkspaceMode.ENABLED; - protected CacheMode cacheMode = CacheMode.NONE; - protected boolean validateOutputConfig = true; - protected boolean validateTbpttConfig = true; - protected DataType dataType; - protected boolean overrideNinUponBuild = true; - - - /** - * Whether to over ride the nIn configuration forcibly upon construction. Default value is true - * - * @param overrideNinUponBuild Whether to over ride the nIn configuration forcibly upon - * construction. - * @return builder pattern - */ - public Builder overrideNinUponBuild(boolean overrideNinUponBuild) { - this.overrideNinUponBuild = overrideNinUponBuild; - return this; - } - - /** - * Specify the processors. These are used at each layer for doing things like normalization and - * shaping of input. - * - * @param processor what to use to preProcess the data. - * @return builder pattern - */ - public Builder inputPreProcessor(Integer layer, InputPreProcessor processor) { - inputPreProcessors.put(layer, processor); - return this; - } - - public Builder inputPreProcessor(String layer, InputPreProcessor processor) { - int i = 0; - for (NeuralNetConfiguration conf : this.confs) { - if (conf.getLayer().getLayerName().equals(layer)) { - inputPreProcessors.put(i, processor); - log.trace("Assigned preProcessor to layer with name {} at index {}", layer, i); - break; - } - i++; - } - if (i >= this.confs.size()) { - log.warn("Could not assign preprocessor to layer with name {} as layer was not found.", - layer); - } - return this; - } - - public Builder inputPreProcessors(Map processors) { - this.inputPreProcessors = processors; - return this; - } - - /** - * @deprecated Use {@link NeuralNetConfiguration.Builder#trainingWorkspaceMode(WorkspaceMode)} - */ - @Deprecated - public Builder trainingWorkspaceMode(@NonNull WorkspaceMode workspaceMode) { - this.trainingWorkspaceMode = workspaceMode; - return this; - } - - /** - * @deprecated Use {@link NeuralNetConfiguration.Builder#inferenceWorkspaceMode(WorkspaceMode)} - */ - @Deprecated - public Builder inferenceWorkspaceMode(@NonNull WorkspaceMode workspaceMode) { - this.inferenceWorkspaceMode = workspaceMode; - return this; - } - - /** - * This method defines how/if preOutput cache is handled: NONE: cache disabled (default value) - * HOST: Host memory will be used DEVICE: GPU memory will be used (on CPU backends effect will - * be the same as for HOST) - * - * @param cacheMode - * @return - */ - public Builder cacheMode(@NonNull CacheMode cacheMode) { - this.cacheMode = cacheMode; - return this; - } - - /** - * The type of backprop. Default setting is used for most networks (MLP, CNN etc), but - * optionally truncated BPTT can be used for training recurrent neural networks. If using - * TruncatedBPTT make sure you set both tBPTTForwardLength() and tBPTTBackwardLength() - */ - public Builder backpropType(@NonNull BackpropType type) { - this.backpropType = type; - return this; - } - - /** - * When doing truncated BPTT: how many steps should we do?
Only applicable when doing - * backpropType(BackpropType.TruncatedBPTT)
See: http://www.cs.utoronto.ca/~ilya/pubs/ilya_sutskever_phd_thesis.pdf - * - * @param bpttLength length > 0 - */ - public Builder tBPTTLength(int bpttLength) { - tBPTTForwardLength(bpttLength); - return tBPTTBackwardLength(bpttLength); - } - - /** - * When doing truncated BPTT: how many steps of forward pass should we do before doing - * (truncated) backprop?
Only applicable when doing - * backpropType(BackpropType.TruncatedBPTT)
Typically tBPTTForwardLength parameter is same - * as the tBPTTBackwardLength parameter, but may be larger than it in some circumstances (but - * never smaller)
Ideally your training data time series length should be divisible by this - * This is the k1 parameter on pg23 of - * http://www.cs.utoronto.ca/~ilya/pubs/ilya_sutskever_phd_thesis.pdf - * - * @param forwardLength Forward length > 0, >= backwardLength - */ - public Builder tBPTTForwardLength(int forwardLength) { - this.tbpttFwdLength = forwardLength; - return this; - } - - /** - * When doing truncated BPTT: how many steps of backward should we do?
Only applicable when - * doing backpropType(BackpropType.TruncatedBPTT)
This is the k2 parameter on pg23 of - * http://www.cs.utoronto.ca/~ilya/pubs/ilya_sutskever_phd_thesis.pdf - * - * @param backwardLength <= forwardLength - */ - public Builder tBPTTBackwardLength(int backwardLength) { - this.tbpttBackLength = backwardLength; - return this; - } - - public Builder confs(List confs) { - this.confs = confs; - return this; - } - - public Builder setInputType(InputType inputType) { - this.inputType = inputType; - return this; - } - - /** - * Enabled by default. If enabled, the output layer configuration will be validated, to throw an - * exception on likely invalid outputs - such as softmax + nOut=1, or LossMCXENT + Tanh.
If - * disabled (false) no output layer validation will be performed.
Disabling this validation - * is not recommended, as the configurations that fail validation usually will not be able to - * learn correctly. However, the option to disable this validation is provided for advanced - * users when creating non-standard architectures. - * - * @param validate If true: validate output layer configuration. False: don't validate - */ - public Builder validateOutputLayerConfig(boolean validate) { - this.validateOutputConfig = validate; - return this; - } - - /** - * Enabled by default. If enabled, an exception will be throw when using the (invalid) - * combination of truncated backpropagation through time (TBPTT) with either a - * GlobalPoolingLayer or LastTimeStepLayer.
It is possible to disable this validation to - * allow what is almost certainly an invalid configuration to be used, however this is not - * recommended. - * - * @param validate Whether TBPTT validation should be performed - */ - public Builder validateTbpttConfig(boolean validate) { - this.validateTbpttConfig = validate; - return this; - } - - /** - * Set the DataType for the network parameters and activations for all layers in the network. - * Default: Float - * - * @param dataType Datatype to use for parameters and activations - */ - public Builder dataType(@NonNull DataType dataType) { - this.dataType = dataType; - return this; - } - - - public MultiLayerConfiguration build() { - //Validate BackpropType setting - if ((tbpttBackLength != DEFAULT_TBPTT_LENGTH || tbpttFwdLength != DEFAULT_TBPTT_LENGTH) - && backpropType != BackpropType.TruncatedBPTT) { - log.warn("Truncated backpropagation through time lengths have been configured with values " - + tbpttFwdLength - + " and " + tbpttBackLength + " but backprop type is set to " + backpropType - + ". TBPTT configuration" + - " settings will only take effect if backprop type is set to BackpropType.TruncatedBPTT"); - } - - if (backpropType == BackpropType.TruncatedBPTT && validateTbpttConfig) { - //Check for invalid combination - tbptt plus LastTimeStepLayer or - for (int i = 0; i < confs.size(); i++) { - Layer l = confs.get(i).getLayer(); - if (l instanceof LastTimeStep || l instanceof GlobalPoolingLayer) { - throw new IllegalStateException( - "Invalid network configuration detected: Truncated backpropagation through time (TBPTT)" - + - " cannot be used with layer " + i + " of type " + l.getClass().getName() - + ": TBPTT is incompatible with this layer type (which is designed " + - "to process entire sequences at once, and does support the type of sequence segments that TPBTT uses).\n" - + - "This check can be disabled using validateTbpttConfig(false) but this is not recommended."); - } - } - } - - if (inputType == null && inputPreProcessors.get(0) == null) { - //User hasn't set the InputType. Sometimes we can infer it... - // For example, Dense/RNN layers, where preprocessor isn't set -> user is *probably* going to feed in - // standard feedforward or RNN data - //This isn't the most elegant implementation, but should avoid breaking backward compatibility here - //Can't infer InputType for CNN layers, however (don't know image dimensions/depth) - Layer firstLayer = confs.get(0).getLayer(); - if (firstLayer instanceof BaseRecurrentLayer) { - BaseRecurrentLayer brl = (BaseRecurrentLayer) firstLayer; - val nIn = brl.getNIn(); - if (nIn > 0) { - inputType = InputType.recurrent(nIn, brl.getRnnDataFormat()); - } - } else if (firstLayer instanceof DenseLayer || firstLayer instanceof EmbeddingLayer - || firstLayer instanceof OutputLayer) { - //Can't just use "instanceof FeedForwardLayer" here. ConvolutionLayer is also a FeedForwardLayer - FeedForwardLayer ffl = (FeedForwardLayer) firstLayer; - val nIn = ffl.getNIn(); - if (nIn > 0) { - inputType = InputType.feedForward(nIn); - } - } - } - - //Add preprocessors and set nIns, if InputType has been set - // Builder.inputType field can be set in 1 of 4 ways: - // 1. User calls setInputType directly - // 2. Via ConvolutionLayerSetup -> internally calls setInputType(InputType.convolutional(...)) - // 3. Via the above code: i.e., assume input is as expected by the RNN or dense layer -> sets the inputType field - if (inputType != null) { - InputType currentInputType = inputType; - for (int i = 0; i < confs.size(); i++) { - Layer l = confs.get(i).getLayer(); - if (inputPreProcessors.get(i) == null) { - //Don't override preprocessor setting, but set preprocessor if required... - InputPreProcessor inputPreProcessor = l.getPreProcessorForInputType(currentInputType); - if (inputPreProcessor != null) { - inputPreProcessors.put(i, inputPreProcessor); - } - } - - InputPreProcessor inputPreProcessor = inputPreProcessors.get(i); - if (inputPreProcessor != null) { - currentInputType = inputPreProcessor.getOutputType(currentInputType); - } - if (i > 0) { - Layer layer = confs.get(i - 1).getLayer(); - //convolution 1d is an edge case where it has rnn input type but the filters - //should be the output - if (layer instanceof Convolution1DLayer) { - if (l instanceof DenseLayer && inputType instanceof InputType.InputTypeRecurrent) { - FeedForwardLayer feedForwardLayer = (FeedForwardLayer) l; - if (inputType instanceof InputType.InputTypeRecurrent) { - InputType.InputTypeRecurrent recurrent = (InputType.InputTypeRecurrent) inputType; - feedForwardLayer.setNIn(recurrent.getTimeSeriesLength()); - } - } else { - l.setNIn(currentInputType, - overrideNinUponBuild); //Don't override the nIn setting, if it's manually set by the user - } - } else { - l.setNIn(currentInputType, - overrideNinUponBuild); //Don't override the nIn setting, if it's manually set by the user - } - - } else { - l.setNIn(currentInputType, - overrideNinUponBuild); //Don't override the nIn setting, if it's manually set by the user - } - - currentInputType = l.getOutputType(i, currentInputType); - } - - } - - MultiLayerConfiguration conf = new MultiLayerConfiguration(); - conf.confs = this.confs; - conf.inputPreProcessors = inputPreProcessors; - conf.backpropType = backpropType; - conf.tbpttFwdLength = tbpttFwdLength; - conf.tbpttBackLength = tbpttBackLength; - conf.trainingWorkspaceMode = trainingWorkspaceMode; - conf.inferenceWorkspaceMode = inferenceWorkspaceMode; - conf.cacheMode = cacheMode; - conf.dataType = dataType; - - Nd4j.getRandom().setSeed(conf.getConf(0).getSeed()); - - //Validate output layer configuration - if (validateOutputConfig) { - //Validate output layer configurations... - for (NeuralNetConfiguration n : conf.getConfs()) { - Layer l = n.getLayer(); - OutputLayerUtil.validateOutputLayer(l.getLayerName(), - l); //No-op for non output/loss layers - } - } - - return conf; - - } - } -} diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetBaseBuilderConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetBaseBuilderConfiguration.java new file mode 100644 index 000000000..8ff512612 --- /dev/null +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetBaseBuilderConfiguration.java @@ -0,0 +1,1021 @@ +/* + * ****************************************************************************** + * * + * * + * * This program and the accompanying materials are made available under the + * * terms of the Apache License, Version 2.0 which is available at + * * https://www.apache.org/licenses/LICENSE-2.0. + * * + * * See the NOTICE file distributed with this work for additional + * * information regarding copyright ownership. + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * * License for the specific language governing permissions and limitations + * * under the License. + * * + * * SPDX-License-Identifier: Apache-2.0 + * ***************************************************************************** + */ + +package org.deeplearning4j.nn.conf; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ArrayNode; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Random; +import lombok.Builder; +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NonNull; +import lombok.Setter; +import lombok.Singular; +import lombok.experimental.SuperBuilder; +import lombok.extern.slf4j.Slf4j; +import net.brutex.ai.dnn.api.INeuralNetworkConfiguration; +import org.deeplearning4j.nn.api.OptimizationAlgorithm; +import org.deeplearning4j.nn.api.layers.LayerConstraint; +import org.deeplearning4j.nn.conf.distribution.Distribution; +import org.deeplearning4j.nn.conf.dropout.Dropout; +import org.deeplearning4j.nn.conf.dropout.IDropout; +import org.deeplearning4j.nn.conf.inputs.InputType; +import org.deeplearning4j.nn.conf.layers.BaseLayer; +import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; +import org.deeplearning4j.nn.conf.serde.JsonMappers; +import org.deeplearning4j.nn.conf.stepfunctions.StepFunction; +import org.deeplearning4j.nn.conf.weightnoise.IWeightNoise; +import org.deeplearning4j.nn.weights.IWeightInit; +import org.deeplearning4j.nn.weights.WeightInit; +import org.deeplearning4j.nn.weights.WeightInitDistribution; +import org.deeplearning4j.nn.weights.WeightInitXavier; +import org.deeplearning4j.util.NetworkUtils; +import org.nd4j.common.base.Preconditions; +import org.nd4j.linalg.activations.Activation; +import org.nd4j.linalg.activations.IActivation; +import org.nd4j.linalg.activations.impl.ActivationSigmoid; +import org.nd4j.linalg.api.buffer.DataType; +import org.nd4j.linalg.learning.config.IUpdater; +import org.nd4j.linalg.learning.config.Sgd; +import org.nd4j.linalg.learning.regularization.L1Regularization; +import org.nd4j.linalg.learning.regularization.L2Regularization; +import org.nd4j.linalg.learning.regularization.Regularization; +import org.nd4j.linalg.learning.regularization.WeightDecay; + +/** + * Deeplearning4j is a domain-specific language to configure deep neural networks, which are made of + * multiple layers. Everything starts with a NeuralNetConfiguration, which organizes those layers + * and their hyperparameters. Hyperparameters are variables that determine how a neural network + * learns. They include how many times to update the weights of the model, how to initialize those + * weights, which activation function to attach to the nodes, which optimization algorithm to use, + * and how fast the model should learn. This is what one configuration would look like: + *

+ * + * NeuralNetConfiguration conf = NeuralNetConfiguration.builder()
+ * .weightInit(WeightInit.XAVIER) .activation(Activation.RELU)
+ * .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
+ * .updater(new Sgd(0.05)) //... other hyperparameters
+ * .backprop(true)
+ * .build();

+ * + * With Deeplearning4j, you add a layer + * by calling layer on the NeuralNetConfiguration.NeuralNetConfigurationBuilder(), specifying its place in the order of + * layers (the zero-indexed layer below is the input layer), the number of input and output nodes, + * nIn and nOut, as well as the type: DenseLayer.

+ * + * .layer(0, new DenseLayer.Builder().nIn(784).nOut(250)
+ * .build())

+ * + * Once you've configured your net, you train the + * model with model.fit. + */ + +@Data +@Slf4j +@EqualsAndHashCode(exclude = {"iterationCount", "epochCount"}) +@JsonIgnoreProperties(ignoreUnknown = true) +//The inner builder, that we can then extend ... +@SuperBuilder //TODO fix access +public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetworkConfiguration { + + private static final int DEFAULT_TBPTT_LENGTH = 20; + + + /** + * Set constraints to be applied to all layers. Default: no constraints.
+ * Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm regularization, + * etc). These constraints are applied at each iteration, after the parameters have been updated.
+ * Note: values set by this method will be applied to all applicable layers in the network, unless a different + * value is explicitly set on a given layer. In other words: values set via this method are used as the default + * value, and can be overridden on a per-layer basis. + * + * @param constraints Constraints to apply to all weight parameters of all layers + */ + @lombok.Builder.Default + protected final List contrainWeights = new ArrayList<>(); + + + + + /** + * Set constraints to be applied to all layers. Default: no constraints.
+ * Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm regularization, + * etc). These constraints are applied at each iteration, after the parameters have been updated.
+ * Note: values set by this method will be applied to all applicable layers in the network, unless a different + * value is explicitly set on a given layer. In other words: values set via this method are used as the default + * value, and can be overridden on a per-layer basis. + * + * @param constraints Constraints to apply to all bias parameters of all layers + */ + @lombok.Builder.Default + protected final List biasConstraints = new ArrayList<>(); + /** + * Set constraints to be applied to all layers. Default: no constraints.
+ * Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm regularization, + * etc). These constraints are applied at each iteration, after the parameters have been updated.
+ * Note: values set by this method will be applied to all applicable layers in the network, unless a different + * value is explicitly set on a given layer. In other words: values set via this method are used as the default + * value, and can be overridden on a per-layer basis. + * + * @param constraints Constraints to apply to all parameters of all layers + */ + @lombok.Builder.Default + protected final List allParamContraints = new ArrayList<>(); + /** + * This is a basic concept, a neural network is made of layers, but also can use + * another neural network as a building block. When the configuration is initialized, those + * building blocks will be flattened into a single list of layers. + * Internal ordered list of layers and inner neural networks. If the object is a NeuralNetConfiguration, + * each configuration must contain at least one layer. + */ + @Getter @lombok.Builder.Default + protected final List innerConfigurations = new ArrayList<>(); + @Getter + @Setter + @NonNull + @lombok.Builder.Default + @Deprecated + protected WorkspaceMode trainingWorkspaceMode = WorkspaceMode.ENABLED; + @Getter + @Setter + @NonNull + @lombok.Builder.Default + @Deprecated + protected WorkspaceMode inferenceWorkspaceMode = WorkspaceMode.ENABLED; + /** + * The type of backprop. Default setting is used for most networks (MLP, CNN etc), but optionally + * truncated BPTT can be used for training recurrent neural networks. If using TruncatedBPTT make + * sure you set both tBPTTForwardLength() and tBPTTBackwardLength() + */ + @Getter + @Setter + @NonNull + @lombok.Builder.Default + protected BackpropType backpropType = BackpropType.Standard; + @Getter + @lombok.Builder.Default + protected Map inputPreProcessors = new HashMap<>(); + /** + * When doing truncated BPTT: how many steps of forward pass should we do before doing (truncated) + * backprop?
Only applicable when doing backpropType(BackpropType.TruncatedBPTT)
Typically + * tBPTTForwardLength parameter is same as the tBPTTBackwardLength parameter, but may be larger + * than it in some circumstances (but never smaller)
Ideally your training data time series + * length should be divisible by this This is the k1 parameter on pg23 of http://www.cs.utoronto.ca/~ilya/pubs/ilya_sutskever_phd_thesis.pdf + * + * @param forwardLength Forward length > 0, >= backwardLength + */ + @Getter + @Setter + @lombok.Builder.Default + protected int tbpttFwdLength = 20; + /** + * When doing truncated BPTT: how many steps of backward should we do?
Only applicable when + * doing backpropType(BackpropType.TruncatedBPTT)
This is the k2 parameter on pg23 of http://www.cs.utoronto.ca/~ilya/pubs/ilya_sutskever_phd_thesis.pdf + * + * @param backwardLength <= forwardLength + */ + @Getter + @Setter + @lombok.Builder.Default + protected int tbpttBackLength = 20; + //Counter for the number of parameter updates so far + // This is important for learning rate schedules, for example, and is stored here to ensure it is persisted + // for Spark and model serialization + @Getter + @Setter + @lombok.Builder.Default + protected int iterationCount = 0; + //Counter for the number of epochs completed so far. Used for per-epoch schedules + @Getter + @Setter + @lombok.Builder.Default + protected int epochCount = 0; + @lombok.Builder.Default + protected double dampingFactor = 100; + //gradient keys used for ensuring order when getting and setting the gradient + //@lombok.Builder.Default + //protected List variables = new ArrayList<>(); + @Getter + @Setter + @lombok.Builder.Default + private boolean miniBatch = false; + /** + * A seed for this network, will be random if not specified. + */ + @Getter + @Setter + @lombok.Builder.Default + private long seed = new Random().nextLong(); + /** + * The default {@link CacheMode} for this configuration. Will be set to "NONE" if not specified + * otherwise. This method defines how/if preOutput cache is handled: NONE: cache disabled (default + * value) HOST: Host memory will be used DEVICE: GPU memory will be used (on CPU backends effect + * will be the same as for HOST) + *

+ * Valid values are
CacheMode.NONE,
CacheMode.HOST or
CacheMode.DEVICE
+ * + * @param cacheMode + */ + @NonNull + @Getter + @Setter + @lombok.Builder.Default + private CacheMode cacheMode = CacheMode.NONE; + + /** + * The name for this configuration. Defaults to "Anonymous INeuralNetworkConfiguration" if it is + * not specified. + */ + @lombok.Builder.Default + @Getter + private String name = "Anonymous INeuralNetworkConfiguration"; + /** + * The {@link InputType} of the data for this network configuration + */ + @Getter + @Setter + private InputType inputType; + /** + * Set the DataType for the network parameters and activations for all layers in the network. + * Default: Float + * + * @param dataType Datatype to use for parameters and activations + */ + @Getter + @Setter + @lombok.Builder.Default + @NonNull + private DataType dataType = DataType.FLOAT; + /** + * Whether to override the nIn configuration forcibly upon construction. Default value is true. + * + * @return builder pattern + */ + @Getter + @Setter + @lombok.Builder.Default + private boolean overrideNinUponBuild = true; + /** + * Enabled by default. If enabled, the output layer configuration will be validated, to throw an + * exception on likely invalid outputs - such as softmax + nOut=1, or LossMCXENT + Tanh.
If + * disabled (false) no output layer validation will be performed.
Disabling this validation is + * not recommended, as the configurations that fail validation usually will not be able to learn + * correctly. However, the option to disable this validation is provided for advanced users when + * creating non-standard architectures. + * + * @param validate If true: validate output layer configuration. False: don't validate + */ + @Getter + @Setter + @lombok.Builder.Default + private boolean validateOutputLayerConfig = true; + /** + * Enabled by default. If enabled, an exception will be throw when using the (invalid) combination + * of truncated backpropagation through time (TBPTT) with either a GlobalPoolingLayer or + * LastTimeStepLayer.
It is possible to disable this validation to allow what is almost + * certainly an invalid configuration to be used, however this is not recommended. + * + * @param validate Whether TBPTT validation should be performed + */ + @Getter + @Setter + @lombok.Builder.Default + private boolean validateTbpttConfig = true; + /** + * Gradient updater configuration. For example, {@link org.nd4j.linalg.learning.config.Adam} or + * {@link org.nd4j.linalg.learning.config.Nesterovs}
Note: values set by this method will be + * applied to all applicable layers in the network, unless a different value is explicitly set on + * a given layer. In other words: values set via this method are used as the default value, and + * can be overridden on a per-layer basis. + * + * @param updater Updater to use + */ + @Getter + @Setter + private IUpdater updater; + /** + * Gradient normalization strategy. Used to specify gradient renormalization, gradient clipping + * etc. See {@link GradientNormalization} for details
Note: values set by this method will be + * applied to all applicable layers in the network, unless a different value is explicitly set on + * a given layer. In other words: values set via this method are used as the default value, and + * can be overridden on a per-layer basis. + * + * @param gradientNormalization Type of normalization to use. Defaults to None. + * @see GradientNormalization + */ + @Getter + @Setter + @NonNull + @lombok.Builder.Default + private GradientNormalization gradientNormalization = GradientNormalization.None; + /** + * Threshold for gradient normalization, only used for GradientNormalization.ClipL2PerLayer, + * GradientNormalization.ClipL2PerParamType, and + * GradientNormalization.ClipElementWiseAbsoluteValue
Not used otherwise.
L2 threshold for + * first two types of clipping, or absolute value threshold for last type of clipping.
Note: + * values set by this method will be applied to all applicable layers in the network, unless a + * different value is explicitly set on a given layer. In other words: values set via this method + * are used as the default value, and can be overridden on a per-layer basis. + */ + @Getter + @Setter + private double gradientNormalizationThreshold; + /** + * Activation function / neuron non-linearity
Note: values set by this method will be applied + * to all applicable layers in the network, unless a different value is explicitly set on a given + * layer. In other words: values set via this method are used as the default value, and can be + * overridden on a per-layer basis. + */ + @Getter + @Setter + private IActivation activation; + //whether to constrain the gradient to unit norm or not + @Getter + @Setter + private StepFunction stepFunction; + @Getter + @Setter + @lombok.Builder.Default + private OptimizationAlgorithm optimizationAlgo = OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT; + @Getter + @Setter + @lombok.Builder.Default + private int maxNumLineSearchIterations = 5; + /** + * Set the regularization for the parameters (excluding biases) - for example {@link WeightDecay}
+ * Note: values set by this method will be applied to all applicable layers in the network, unless a different + * value is explicitly set on a given layer. In other words: values set via this method are used as the default + * value, and can be overridden on a per-layer basis.
+ * + * @param regularization Regularization to apply for the network parameters/weights (excluding biases) + */ + @Getter + @lombok.Builder.Default + private List regularization = new ArrayList<>(); + /** + * Set the regularization for the biases only - for example {@link WeightDecay}
+ * Note: values set by this method will be applied to all applicable layers in the network, unless a different + * value is explicitly set on a given layer. In other words: values set via this method are used as the default + * value, and can be overridden on a per-layer basis.
+ * + * @param regularizationBias Regularization to apply for the network biases only + */ + @Getter + @lombok.Builder.Default + private List regularizationBias = new ArrayList<>(); + @Getter + @Setter + @lombok.Builder.Default + private IUpdater iUpdater = new Sgd(); + /** + * Gradient updater configuration, for the biases only. If not set, biases will use the updater as + * set by {@link #setIUpdater(IUpdater)}
+ * Note: values set by this method will be applied to all applicable layers in the network, unless a different + * value is explicitly set on a given layer. In other words: values set via this method are used as the default + * value, and can be overridden on a per-layer basis. + * + * @param updater Updater to use for bias parameters + */ + @Getter + @Setter + @lombok.Builder.Default + private IUpdater biasUpdater = null; + @Getter + @Setter + @lombok.Builder.Default + private IActivation activationFn = new ActivationSigmoid(); + /** + * Weight initialization scheme to use, for initial weight values Note: values set by this method + * will be applied to all applicable layers in the network, unless a different value is explicitly + * set on a given layer. In other words: values set via this method are used as the default value, + * and can be overridden on a per-layer basis. + */ + @Getter + @Setter + @lombok.Builder.Default + private IWeightInit weightInitFn = new WeightInitXavier(); + /** + * Sets the convolution mode for convolutional layers, which impacts padding and output sizes. + * See {@link ConvolutionMode} for details. Defaults to ConvolutionMode.TRUNCATE
+ * Note: values set by this method will be applied to all applicable layers in the network, unless a different + * value is explicitly set on a given layer. In other words: values set via this method are used as the default + * value, and can be overridden on a per-layer basis. + * @param convolutionMode Convolution mode to use + */ + @Getter + @Setter + @lombok.Builder.Default + private ConvolutionMode convolutionMode = ConvolutionMode.Truncate; + /** + * Sets the cuDNN algo mode for convolutional layers, which impacts performance and memory usage of cuDNN. + * See {@link ConvolutionLayer.AlgoMode} for details. Defaults to "PREFER_FASTEST", but "NO_WORKSPACE" uses less memory. + *
+ * Note: values set by this method will be applied to all applicable layers in the network, unless a different + * value is explicitly set on a given layer. In other words: values set via this method are used as the default + * value, and can be overridden on a per-layer basis. + * @param cudnnAlgoMode cuDNN algo mode to use + */ + @Getter + @Setter + @lombok.Builder.Default + private ConvolutionLayer.AlgoMode cudnnAlgoMode = ConvolutionLayer.AlgoMode.PREFER_FASTEST; + @Getter + @Setter + @lombok.Builder.Default + private boolean minimize = true; + /** + * Set the dropout for all layers in this network
+ * Note: values set by this method will be applied to all applicable layers in the network, unless a different + * value is explicitly set on a given layer. In other words: values set via this method are used as the default + * value, and can be overridden on a per-layer basis. + * * Dropout probability. This is the probability of retaining each input activation value for a layer. + * * dropOut(x) will keep an input activation with probability x, and set to 0 with probability 1-x.
+ * * dropOut(0.0) is a special value / special case - when set to 0.0., dropout is disabled (not applied). Note + * * that a dropout value of 1.0 is functionally equivalent to no dropout: i.e., 100% probability of retaining + * * each input activation.
+ * *

+ * * Note 1: Dropout is applied at training time only - and is automatically not applied at test time + * * (for evaluation, etc)
+ * * Note 2: This sets the probability per-layer. Care should be taken when setting lower values for + * * complex networks (too much information may be lost with aggressive (very low) dropout values).
+ * * Note 3: Frequently, dropout is not applied to (or, has higher retain probability for) input (first layer) + * * layers. Dropout is also often not applied to output layers. This needs to be handled MANUALLY by the user + * * - set .dropout(0) on those layers when using global dropout setting.
+ * * Note 4: Implementation detail (most users can ignore): DL4J uses inverted dropout, as described here: + * * http://cs231n.github.io/neural-networks-2/ + * *

+ * *
+ * * Note: values set by this method will be applied to all applicable layers in the network, unless a different + * * value is explicitly set on a given layer. In other words: values set via this method are used as the default + * * value, and can be overridden on a per-layer basis. + * * + * * @param inputRetainProbability Dropout probability (probability of retaining each input activation value for a layer) + * * @see #dropOut(IDropout) + * + * + * @param dropout Dropout, such as {@link Dropout}, {@link org.deeplearning4j.nn.conf.dropout.GaussianDropout}, + * {@link org.deeplearning4j.nn.conf.dropout.GaussianNoise} etc + * @return + */ + @Getter + @Setter + private IDropout idropOut; + /** + * Set the weight noise (such as {@link org.deeplearning4j.nn.conf.weightnoise.DropConnect} and + * {@link org.deeplearning4j.nn.conf.weightnoise.WeightNoise}) for the layers in this network.
+ * Note: values set by this method will be applied to all applicable layers in the network, unless a different + * value is explicitly set on a given layer. In other words: values set via this method are used as the default + * value, and can be overridden on a per-layer basis. + * + * @param weightNoise Weight noise instance to use + */ + @Getter + @Setter + private IWeightNoise weightNoise; + @Getter + @Setter + @lombok.Builder.Default + private double biasInit = 0.0; + @Getter + @Setter + @lombok.Builder.Default + private double gainInit = 1.0; + + /** + * Handle {@link WeightInit} and {@link Distribution} from legacy configs in Json format. Copied + * from handling of {@link Activation} above. + * + * @return True if all is well and layer iteration shall continue. False else-wise. + */ + private static boolean handleLegacyWeightInitFromJson(String json, LayerConfiguration l, + ObjectMapper mapper, + JsonNode confs, int layerCount) { + if ((l instanceof BaseLayer) && ((BaseLayer) l).getWeightInitFn() == null) { + try { + JsonNode jsonNode = mapper.readTree(json); + if (confs == null) { + confs = jsonNode.get("confs"); + } + if (confs instanceof ArrayNode) { + ArrayNode layerConfs = (ArrayNode) confs; + JsonNode outputLayerNNCNode = layerConfs.get(layerCount); + if (outputLayerNNCNode == null) { + return false; //Should never happen... + } + JsonNode layerWrapperNode = outputLayerNNCNode.get("layer"); + + if (layerWrapperNode == null || layerWrapperNode.size() != 1) { + return true; + } + + JsonNode layerNode = layerWrapperNode.elements().next(); + JsonNode weightInit = layerNode.get( + "weightInit"); //Should only have 1 element: "dense", "output", etc + JsonNode distribution = layerNode.get("dist"); + + Distribution dist = null; + if (distribution != null) { + dist = mapper.treeToValue(distribution, Distribution.class); + } + + if (weightInit != null) { + final IWeightInit wi = WeightInit.valueOf(weightInit.asText()) + .getWeightInitFunction(dist); + ((BaseLayer) l).setWeightInitFn(wi); + } + } + + } catch (IOException e) { + log.warn( + "ILayer with null WeightInit detected: " + l.getLayerName() + ", could not parse JSON", + e); + } + } + return true; + + } + + /** + * Object mapper for serialization of configurations + * + * @return + */ + public static ObjectMapper mapperYaml() { + return JsonMappers.getMapperYaml(); + } + + /** + * Object mapper for serialization of configurations + * + * @return + */ + public static ObjectMapper mapper() { + return JsonMappers.getMapper(); + } + + public static NeuralNetBaseBuilderConfiguration fromYaml(String input) { + throw new RuntimeException("Needs fixing - not supported."); //TODO + } + + + /** + * @return JSON representation of NN configuration + */ + public String toYaml() { + ObjectMapper mapper = NeuralNetBaseBuilderConfiguration.mapperYaml(); + synchronized (mapper) { + try { + return mapper.writeValueAsString(this); + } catch (com.fasterxml.jackson.core.JsonProcessingException e) { + throw new RuntimeException(e); + } + } + } + + /** + * @return JSON representation of NN configuration + */ + public String toJson() { + ObjectMapper mapper = NeuralNetBaseBuilderConfiguration.mapper(); + synchronized (mapper) { + //JSON mappers are supposed to be thread safe: however, in practice they seem to miss fields occasionally + //when writeValueAsString is used by multiple threads. This results in invalid JSON. See issue #3243 + try { + return mapper.writeValueAsString(this); + } catch (com.fasterxml.jackson.core.JsonProcessingException e) { + throw new RuntimeException(e); + } + } + } + + public abstract static class NeuralNetBaseBuilderConfigurationBuilder + > { + + List innerConfigurations$value = new ArrayList<>(); //initialize with an empty list + + /** + * Set constraints to be applied to all layers. Default: no constraints.
+ * Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm regularization, + * etc). These constraints are applied at each iteration, after the parameters have been updated.
+ * Note: values set by this method will be applied to all applicable layers in the network, unless a different + * value is explicitly set on a given layer. In other words: values set via this method are used as the default + * value, and can be overridden on a per-layer basis. + * + * @param constraints Constraints to apply to all weight parameters of all layers + */ + public B constrainWeights(LayerConstraint... constraints) { + contrainWeights$value = Arrays.asList(constraints); + contrainWeights$set = true; + return (B) this; + } + + /** + * For the (perhaps partially constructed) network configuration, return a list of activation sizes for each + * layer in the network.
+ * Note: To use this method, the network input type must have been set using {@link #setInputType(InputType)} first + * @return A list of activation types for the network, indexed by layer number + */ + public List getLayerActivationTypes(){ + Preconditions.checkState(inputType != null, "Can only calculate activation types if input type has" + + "been set. Use setInputType(InputType)"); + + + throw new RuntimeException("Error calculating layer activation types: error instantiating MultiLayerConfiguration"); + + } + + + /** + * Set constraints to be applied to all layers. Default: no constraints.
+ * Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm regularization, + * etc). These constraints are applied at each iteration, after the parameters have been updated.
+ * Note: values set by this method will be applied to all applicable layers in the network, unless a different + * value is explicitly set on a given layer. In other words: values set via this method are used as the default + * value, and can be overridden on a per-layer basis. + * + * @param constraints Constraints to apply to all parameters of all layers + */ + public B constrainAllParameters(LayerConstraint... constraints){ + allParamContraints$value = Arrays.asList(constraints); + allParamContraints$set = true; + return (B) this; + } + + /** + * Set constraints to be applied to all layers. Default: no constraints.
+ * Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm regularization, + * etc). These constraints are applied at each iteration, after the parameters have been updated.
+ * Note: values set by this method will be applied to all applicable layers in the network, unless a different + * value is explicitly set on a given layer. In other words: values set via this method are used as the default + * value, and can be overridden on a per-layer basis. + * + * @param constraints Constraints to apply to all bias parameters of all layers + */ + public B constrainBias(LayerConstraint... constraints) { + biasConstraints$value = Arrays.asList(constraints); + biasConstraints$set = true; + return (B) this; + } + + /** + * Specify the processors. These are used at each layer for doing things like normalization and + * shaping of input. + * + * @param processor what to use to preProcess the data. + * @return builder pattern + */ + public B inputPreProcessor(Integer layer, + InputPreProcessor processor) { + inputPreProcessors$value.put(layer, processor); + inputPreProcessors$set = true; + return (B) this; + } + + + /** + * Set layer at index + * + * @param index where to insert + * @param layer the layer + * @return builder + */ + public B layer(Integer index, @NonNull LayerConfiguration layer) { + innerConfigurations$value.add(index, layer); + innerConfigurations$set = true; + return (B) this; + } + + /** + * Add a layer + * + * @param layer the layer + * @return builder + */ + public B layer(@NonNull LayerConfiguration layer) { + innerConfigurations$value.add(layer); + innerConfigurations$set = true; + return (B) this; + } + + //TODO this is a dirty workaround + public boolean isOverrideNinUponBuild() { + return isOverrideNinUponBuild(); + } + + /** + * Specify additional layer configurations + */ + @Deprecated + public B layersFromArray(@NonNull LayerConfiguration[] arrLayers) { + innerConfigurations$value.addAll(List.of(arrLayers)); + innerConfigurations$set = true; + return (B) this; + } + + /** + * Specify additional layer configurations + */ + @Deprecated + public B layersFromList(@NonNull List listLayers) { + innerConfigurations$value.addAll(listLayers); + innerConfigurations$set = true; + return (B) this; + } + + + /** + * L1 regularization coefficient for the weights (excluding biases).
Note: values set by + * this method will be applied to all applicable layers in the network, unless a different value + * is explicitly set on a given layer. In other words: values set via this method are used as + * the default value, and can be overridden on a per-layer basis. + */ + public B l1(double l1) { + //Check if existing L1 exists; if so, replace it + NetworkUtils.removeInstances(regularization$value, L1Regularization.class); + if (l1 > 0.0) { + regularization$value.add(new L1Regularization(l1)); + } + regularization$set = true; + return (B) this; + } + + /** + * L2 regularization coefficient for the weights (excluding biases).
+ * Note: Generally, {@link WeightDecay} (set via {@link #weightDecay(double)} should be + * preferred to + * L2 regularization. See {@link WeightDecay} javadoc for further details.
Note: values set + * by this method will be applied to all applicable layers in the network, unless a different + * value is explicitly set on a given layer. In other words: values set via this method are used + * as the default value, and can be overridden on a per-layer basis.
Note: L2 regularization + * and weight decay usually should not be used together; if any weight decay (or L2) has been + * added for the biases, these will be removed first. + * + * @see #weightDecay(double, boolean) + */ + public B l2(double l2) { + //Check if existing L2 exists; if so, replace it. Also remove weight decay - it doesn't make sense to use both + NetworkUtils.removeInstances(regularization$value, L2Regularization.class); + if (l2 > 0.0) { + NetworkUtils.removeInstancesWithWarning(regularization$value, WeightDecay.class, + "WeightDecay regularization removed: incompatible with added L2 regularization"); + regularization$value.add(new L2Regularization(l2)); + } + regularization$set = true; + return (B) this; + } + + /** + * L1 regularization coefficient for the bias.
Note: values set by this method will be + * applied to all applicable layers in the network, unless a different value is explicitly set + * on a given layer. In other words: values set via this method are used as the default value, + * and can be overridden on a per-layer basis. + */ + public B l1Bias(double l1Bias) { + NetworkUtils.removeInstances(regularizationBias$value, L1Regularization.class); + if (l1Bias > 0.0) { + regularizationBias$value.add(new L1Regularization(l1Bias)); + } + regularizationBias$set = true; + return (B) this; + } + + /** + * L2 regularization coefficient for the bias.
+ * Note: Generally, {@link WeightDecay} (set via {@link #weightDecayBias(double, boolean)} + * should be preferred to + * L2 regularization. See {@link WeightDecay} javadoc for further details.
Note: values set + * by this method will be applied to all applicable layers in the network, unless a different + * value is explicitly set on a given layer. In other words: values set via this method are used + * as the default value, and can be overridden on a per-layer basis.
Note: L2 regularization + * and weight decay usually should not be used together; if any weight decay (or L2) has been + * added for the biases, these will be removed first. + * + * @see #weightDecayBias(double, boolean) + */ + public B l2Bias(double l2Bias) { + NetworkUtils.removeInstances(regularizationBias$value, L2Regularization.class); + if (l2Bias > 0.0) { + NetworkUtils.removeInstancesWithWarning(regularizationBias$value, WeightDecay.class, + "L2 bias regularization removed: incompatible with added WeightDecay regularization"); + regularizationBias$value.add(new L2Regularization(l2Bias)); + } + return (B) this; + } + + /** + * Add weight decay regularization for the network parameters (excluding biases).
This + * applies weight decay with multiplying the learning rate - see {@link WeightDecay} for + * more details.
Note: values set by this method will be applied to all applicable layers in + * the network, unless a different value is explicitly set on a given layer. In other words: + * values set via this method are used as the default value, and can be overridden on a + * per-layer basis.
+ * + * @param coefficient Weight decay regularization coefficient + * @see #weightDecay(double, boolean) + */ + public B weightDecay(double coefficient) { + return weightDecay(coefficient, true); + } + + /** + * Add weight decay regularization for the network parameters (excluding biases). See + * {@link WeightDecay} for more details.
Note: values set by this method will be applied to + * all applicable layers in the network, unless a different value is explicitly set on a given + * layer. In other words: values set via this method are used as the default value, and can be + * overridden on a per-layer basis.
+ * + * @param coefficient Weight decay regularization coefficient + * @param applyLR Whether the learning rate should be multiplied in when performing weight + * decay updates. See {@link WeightDecay} for more details. + * @see #weightDecay(double, boolean) + */ + public B weightDecay(double coefficient, boolean applyLR) { + //Check if existing weight decay if it exists; if so, replace it. Also remove L2 - it doesn't make sense to use both + NetworkUtils.removeInstances(regularization$value, WeightDecay.class); + if (coefficient > 0.0) { + NetworkUtils.removeInstancesWithWarning(regularization$value, L2Regularization.class, + "L2 regularization removed: incompatible with added WeightDecay regularization"); + regularization$value.add(new WeightDecay(coefficient, applyLR)); + } + regularization$set = true; + return (B) this; + } + + /** + * Weight decay for the biases only - see {@link #weightDecay(double)} for more details. This + * applies weight decay with multiplying the learning rate.
Note: values set by this + * method will be applied to all applicable layers in the network, unless a different value is + * explicitly set on a given layer. In other words: values set via this method are used as the + * default value, and can be overridden on a per-layer basis.
+ * + * @param coefficient Weight decay regularization coefficient + * @see #weightDecayBias(double, boolean) + */ + public B weightDecayBias(double coefficient) { + return weightDecayBias(coefficient, true); + } + + /** + * Weight decay for the biases only - see {@link #weightDecay(double)} for more details
+ * Note: values set by this method will be applied to all applicable layers in the network, + * unless a different value is explicitly set on a given layer. In other words: values set via + * this method are used as the default value, and can be overridden on a per-layer basis.
+ * + * @param coefficient Weight decay regularization coefficient + */ + public B weightDecayBias(double coefficient, boolean applyLR) { + //Check if existing weight decay if it exists; if so, replace it. Also remove L2 - it doesn't make sense to use both + NetworkUtils.removeInstances(regularizationBias$value, WeightDecay.class); + if (coefficient > 0) { + NetworkUtils.removeInstancesWithWarning(regularizationBias$value, L2Regularization.class, + "L2 bias regularization removed: incompatible with added WeightDecay regularization"); + regularizationBias$value.add(new WeightDecay(coefficient, applyLR)); + } + regularization$set = true; + return (B) this; + } + + /** + * Activation function / neuron non-linearity
Note: values set by this method will be + * applied to all applicable layers in the network, unless a different value is explicitly set + * on a given layer. In other words: values set via this method are used as the default value, + * and can be overridden on a per-layer basis. + */ + @Deprecated + public B activation(@NonNull Activation activation) { + return (B) activationFn(activation.getActivationFunction()); + } + + + + @Deprecated + public B weightInit(@NonNull WeightInit wi) { + return (B) weightInitFn(wi.getWeightInitFunction()); + } + + /** + * legacy code, does nothing + * @return + */ + @Deprecated + public B list() { + return (B) this; + } + + + /** + * Set weight initialization scheme to random sampling via the specified distribution. + * Equivalent to: {@code .weightInit(new WeightInitDistribution(distribution))} Note: values set + * by this method will be applied to all applicable layers in the network, unless a different + * value is explicitly set on a given layer. In other words: values set via this method are used + * as the default value, and can be overridden on a per-layer basis. + * + * @param distribution Distribution to use for weight initialization + */ + public B weightInit(@NonNull Distribution distribution) { + return (B) weightInitFn(new WeightInitDistribution(distribution)); + } + + /** + * Same as {@link #weightInit(Distribution)}. + * @param distribution + * @return + */ + public B dist(@NonNull Distribution distribution) { + return (B) weightInit(distribution); + } + + public B dropOut(@NonNull IDropout dropout) { + return (B) idropOut(dropout); + } + + /** + * Creates a new {@link Dropout} and sets the dropout in the builder for this configuration + * @param dropout activationRetainProbability + * @return builder + */ + public B dropOut( double dropout) { + return (B) idropOut( new Dropout(dropout)); + } + + /** + * Add multiple inner neural net configurations at once + * @param confs list of configurations + * @return builder + */ + @Deprecated + public B confs(@NonNull List confs) { + innerConfigurations$value.addAll(confs); + innerConfigurations$set=true; + return (B) this; + } + } + + @Override + public NeuralNetBaseBuilderConfiguration clone() { + NeuralNetBaseBuilderConfiguration clone; + try { + clone = (NeuralNetBaseBuilderConfiguration) super.clone(); + } catch(CloneNotSupportedException ex) { + throw new RuntimeException(ex); + } + if (clone.stepFunction != null) { + clone.stepFunction = clone.stepFunction.clone(); + } + /** + if (clone.variables != null) { + clone.variables = new ArrayList<>(clone.variables); + } + **/ + + clone.getInnerConfigurations().addAll(innerConfigurations); + + if (clone.getInputPreProcessors() != null) { + Map map = new HashMap<>(); + for (Map.Entry entry : clone.getInputPreProcessors().entrySet()) { + map.put(entry.getKey(), entry.getValue().clone()); + } + clone.getInputPreProcessors().clear(); + clone.getInputPreProcessors().putAll(map); + } + + clone.setInferenceWorkspaceMode(this.inferenceWorkspaceMode); + clone.setTrainingWorkspaceMode(this.trainingWorkspaceMode); + clone.setCacheMode(this.cacheMode); + clone.setValidateOutputLayerConfig(this.validateOutputLayerConfig); + clone.setDataType(this.dataType); + + return clone; + + } +} diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java index f44a8f3ab..5c221222c 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java @@ -20,33 +20,54 @@ package org.deeplearning4j.nn.conf; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.exc.InvalidTypeIdException; +import com.fasterxml.jackson.databind.node.ArrayNode; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; import lombok.Data; import lombok.EqualsAndHashCode; -import lombok.NoArgsConstructor; +import lombok.Getter; import lombok.NonNull; +import lombok.Setter; +import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import lombok.extern.slf4j.Slf4j; -import net.brutex.ai.dnn.api.INeuralNetworkConfiguration; +import lombok.val; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.conf.distribution.Distribution; import org.deeplearning4j.nn.conf.dropout.Dropout; import org.deeplearning4j.nn.conf.dropout.IDropout; import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.conf.layers.*; -import org.deeplearning4j.nn.conf.layers.misc.FrozenLayer; -import org.deeplearning4j.nn.conf.layers.misc.FrozenLayerWithBackprop; -import org.deeplearning4j.nn.conf.layers.recurrent.Bidirectional; -import org.deeplearning4j.nn.conf.layers.samediff.AbstractSameDiffLayer; -import org.deeplearning4j.nn.conf.layers.wrapper.BaseWrapperLayer; +import org.deeplearning4j.nn.conf.layers.BaseLayer; +import org.deeplearning4j.nn.conf.layers.BaseOutputLayer; +import org.deeplearning4j.nn.conf.layers.BaseRecurrentLayer; +import org.deeplearning4j.nn.conf.layers.Convolution1DLayer; +import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; +import org.deeplearning4j.nn.conf.layers.DenseLayer; +import org.deeplearning4j.nn.conf.layers.EmbeddingLayer; +import org.deeplearning4j.nn.conf.layers.FeedForwardLayer; +import org.deeplearning4j.nn.conf.layers.GlobalPoolingLayer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; +import org.deeplearning4j.nn.conf.layers.OutputLayer; +import org.deeplearning4j.nn.conf.layers.recurrent.LastTimeStep; +import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; +import org.deeplearning4j.nn.conf.memory.MemoryReport; +import org.deeplearning4j.nn.conf.memory.NetworkMemoryReport; import org.deeplearning4j.nn.conf.serde.JsonMappers; import org.deeplearning4j.nn.conf.stepfunctions.StepFunction; import org.deeplearning4j.nn.conf.weightnoise.IWeightNoise; import org.deeplearning4j.nn.weights.IWeightInit; import org.deeplearning4j.nn.weights.WeightInit; -import org.deeplearning4j.nn.weights.WeightInitDistribution; -import org.deeplearning4j.nn.weights.WeightInitXavier; -import org.deeplearning4j.util.NetworkUtils; -import org.nd4j.common.base.Preconditions; +import org.deeplearning4j.util.OutputLayerUtil; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.activations.impl.ActivationSigmoid; @@ -54,1168 +75,1074 @@ import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.learning.config.IUpdater; import org.nd4j.linalg.learning.config.Sgd; -import org.nd4j.linalg.learning.regularization.L1Regularization; -import org.nd4j.linalg.learning.regularization.L2Regularization; import org.nd4j.linalg.learning.regularization.Regularization; import org.nd4j.linalg.learning.regularization.WeightDecay; -import com.fasterxml.jackson.databind.ObjectMapper; - -import java.io.IOException; -import java.io.Serializable; -import java.util.*; +import org.nd4j.linalg.lossfunctions.LossFunctions; +import org.nd4j.linalg.lossfunctions.impl.LossBinaryXENT; +import org.nd4j.linalg.lossfunctions.impl.LossMCXENT; +import org.nd4j.linalg.lossfunctions.impl.LossMSE; +import org.nd4j.linalg.lossfunctions.impl.LossNegativeLogLikelihood; +/** + * Deeplearning4j is a domain-specific language to configure deep neural networks, which are made of + * multiple layers. Everything starts with a NeuralNetConfiguration, which organizes those layers + * and their hyperparameters. Hyperparameters are variables that determine how a neural network + * learns. They include how many times to update the weights of the model, how to initialize those + * weights, which activation function to attach to the nodes, which optimization algorithm to use, + * and how fast the model should learn. This is what one configuration would look like: + *

+ * + * NeuralNetConfiguration conf = NeuralNetConfiguration.builder()
+ * .weightInit(WeightInit.XAVIER) .activation(Activation.RELU)
+ * .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
+ * .updater(new Sgd(0.05)) //... other hyperparameters
+ * .backprop(true)
+ * .build();

+ * + * With Deeplearning4j, you add a layer + * by calling layer on the NeuralNetConfiguration.NeuralNetConfigurationBuilder(), specifying its place in the order of + * layers (the zero-indexed layer below is the input layer), the number of input and output nodes, + * nIn and nOut, as well as the type: DenseLayer.

+ * + * .layer(0, new DenseLayer.Builder().nIn(784).nOut(250)
+ * .build())

+ * + * Once you've configured your net, you train the + * model with model.fit. + */ @Data -@NoArgsConstructor @Slf4j @EqualsAndHashCode(exclude = {"iterationCount", "epochCount"}) -public class NeuralNetConfiguration implements Serializable, Cloneable, - INeuralNetworkConfiguration { +@Jacksonized +@JsonIgnoreProperties(ignoreUnknown = true) +//The inner builder, that we can then extend ... +@SuperBuilder //TODO fix access +public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { + private static final int DEFAULT_TBPTT_LENGTH = 20; - protected Layer layer; - //batch size: primarily used for conv nets. Will be reinforced if set. - protected boolean miniBatch = true; - //number of line search iterations - protected int maxNumLineSearchIterations; - protected long seed; - protected OptimizationAlgorithm optimizationAlgo; - //gradient keys used for ensuring order when getting and setting the gradient - protected List variables = new ArrayList<>(); - //whether to constrain the gradient to unit norm or not - protected StepFunction stepFunction; - //minimize or maximize objective - protected boolean minimize = true; + /** + * Set constraints to be applied to all layers. Default: no constraints.
+ * Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm regularization, + * etc). These constraints are applied at each iteration, after the parameters have been updated.
+ * Note: values set by this method will be applied to all applicable layers in the network, unless a different + * value is explicitly set on a given layer. In other words: values set via this method are used as the default + * value, and can be overridden on a per-layer basis. + * + * @param constraints Constraints to apply to all bias parameters of all layers + */ + @lombok.Builder.Default + protected final List biasConstraints = new ArrayList<>(); + /** + * Set constraints to be applied to all layers. Default: no constraints.
+ * Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm regularization, + * etc). These constraints are applied at each iteration, after the parameters have been updated.
+ * Note: values set by this method will be applied to all applicable layers in the network, unless a different + * value is explicitly set on a given layer. In other words: values set via this method are used as the default + * value, and can be overridden on a per-layer basis. + * + * @param constraints Constraints to apply to all parameters of all layers + */ + @lombok.Builder.Default + protected final List allParamContraints = new ArrayList<>(); - // this field defines preOutput cache - protected CacheMode cacheMode; + @Getter + @Setter + @NonNull + @lombok.Builder.Default + @Deprecated + protected WorkspaceMode trainingWorkspaceMode = WorkspaceMode.ENABLED; + @Getter + @Setter + @NonNull + @lombok.Builder.Default + @Deprecated + protected WorkspaceMode inferenceWorkspaceMode = WorkspaceMode.ENABLED; + /** + * The type of backprop. Default setting is used for most networks (MLP, CNN etc), but optionally + * truncated BPTT can be used for training recurrent neural networks. If using TruncatedBPTT make + * sure you set both tBPTTForwardLength() and tBPTTBackwardLength() + */ + @Getter + @Setter + @NonNull + @lombok.Builder.Default + protected BackpropType backpropType = BackpropType.Standard; + /** + * When doing truncated BPTT: how many steps of forward pass should we do before doing (truncated) + * backprop?
Only applicable when doing backpropType(BackpropType.TruncatedBPTT)
Typically + * tBPTTForwardLength parameter is same as the tBPTTBackwardLength parameter, but may be larger + * than it in some circumstances (but never smaller)
Ideally your training data time series + * length should be divisible by this This is the k1 parameter on pg23 of http://www.cs.utoronto.ca/~ilya/pubs/ilya_sutskever_phd_thesis.pdf + * + * @param forwardLength Forward length > 0, >= backwardLength + */ + @Getter + @Setter + @lombok.Builder.Default + protected int tbpttFwdLength = 20; + /** + * When doing truncated BPTT: how many steps of backward should we do?
Only applicable when + * doing backpropType(BackpropType.TruncatedBPTT)
This is the k2 parameter on pg23 of http://www.cs.utoronto.ca/~ilya/pubs/ilya_sutskever_phd_thesis.pdf + * + * @param backwardLength <= forwardLength + */ + @Getter + @Setter + @lombok.Builder.Default + protected int tbpttBackLength = 20; + /** + * Creates and returns a copy of this object. + * + * @return a clone of this instance. + * @throws CloneNotSupportedException if the object's class does not support the {@code Cloneable} + * interface. Subclasses that override the {@code clone} method can also throw this exception to + * indicate that an instance cannot be cloned. + * @see Cloneable + */ - protected DataType dataType = DataType.FLOAT; //Default to float for deserialization of legacy format nets + //Nd4j.getRandom().setSeed(getConf(0).getSeed()); //TODO + //Counter for the number of parameter updates so far + // This is important for learning rate schedules, for example, and is stored here to ensure it is persisted + // for Spark and model serialization + @Getter + @Setter + @lombok.Builder.Default + protected int iterationCount = 0; + //Counter for the number of epochs completed so far. Used for per-epoch schedules + @Getter + @Setter + @lombok.Builder.Default + protected int epochCount = 0; + @lombok.Builder.Default + protected double dampingFactor = 100; + //gradient keys used for ensuring order when getting and setting the gradient + @lombok.Builder.Default + protected List netWideVariables = new ArrayList<>(); + @Getter + @Setter + @lombok.Builder.Default + private boolean miniBatch = false; + /** + * A seed for this network, will be random if not specified. - //Counter for the number of parameter updates so far for this layer. - //Note that this is only used for pretrain layers (AE, VAE) - MultiLayerConfiguration and ComputationGraphConfiguration - //contain counters for standard backprop training. - // This is important for learning rate schedules, for example, and is stored here to ensure it is persisted - // for Spark and model serialization - protected int iterationCount = 0; + @Getter + @Setter + @lombok.Builder.Default + private long seed = new Random().nextLong(); */ + /** + * The default {@link CacheMode} for this configuration. Will be set to "NONE" if not specified + * otherwise. This method defines how/if preOutput cache is handled: NONE: cache disabled (default + * value) HOST: Host memory will be used DEVICE: GPU memory will be used (on CPU backends effect + * will be the same as for HOST) + *

+ * Valid values are
CacheMode.NONE,
CacheMode.HOST or
CacheMode.DEVICE
+ * + * @param cacheMode + */ + @NonNull + @Getter + @Setter + @lombok.Builder.Default + private CacheMode cacheMode = CacheMode.NONE; + /** + * The list of layer configurations in this configuration. They will be indexed automatically as + * the layers get added starting with index 0. + */ - //Counter for the number of epochs completed so far. Used for per-epoch schedules - protected int epochCount = 0; + @lombok.Builder.Default + @Getter + private String name = "Anonymous INeuralNetworkConfiguration"; + /** + * The {@link InputType} of the data for this network configuration + */ + @Getter + @Setter + private InputType inputType; + /** + * Set the DataType for the network parameters and activations for all layers in the network. + * Default: Float + * + * @param dataType Datatype to use for parameters and activations + */ + @Getter + @Setter + @lombok.Builder.Default + @NonNull + private DataType dataType = DataType.FLOAT; + /** + * Whether to override the nIn configuration forcibly upon construction. Default value is true. + * + * @return builder pattern + */ + @Getter + @Setter + @lombok.Builder.Default + private boolean overrideNinUponBuild = true; + /** + * Enabled by default. If enabled, the output layer configuration will be validated, to throw an + * exception on likely invalid outputs - such as softmax + nOut=1, or LossMCXENT + Tanh.
If + * disabled (false) no output layer validation will be performed.
Disabling this validation is + * not recommended, as the configurations that fail validation usually will not be able to learn + * correctly. However, the option to disable this validation is provided for advanced users when + * creating non-standard architectures. + * + * @param validate If true: validate output layer configuration. False: don't validate + */ + @Getter + @Setter + @lombok.Builder.Default + private boolean validateOutputLayerConfig = true; + /** + * Enabled by default. If enabled, an exception will be throw when using the (invalid) combination + * of truncated backpropagation through time (TBPTT) with either a GlobalPoolingLayer or + * LastTimeStepLayer.
It is possible to disable this validation to allow what is almost + * certainly an invalid configuration to be used, however this is not recommended. + * + * @param validate Whether TBPTT validation should be performed + */ + @Getter + @Setter + @lombok.Builder.Default + private boolean validateTbpttConfig = true; + /** + * Gradient updater configuration. For example, {@link org.nd4j.linalg.learning.config.Adam} or + * {@link org.nd4j.linalg.learning.config.Nesterovs}
Note: values set by this method will be + * applied to all applicable layers in the network, unless a different value is explicitly set on + * a given layer. In other words: values set via this method are used as the default value, and + * can be overridden on a per-layer basis. + * + * @param updater Updater to use + */ + @Getter + @Setter + private IUpdater updater; + /** + * Gradient normalization strategy. Used to specify gradient renormalization, gradient clipping + * etc. See {@link GradientNormalization} for details
Note: values set by this method will be + * applied to all applicable layers in the network, unless a different value is explicitly set on + * a given layer. In other words: values set via this method are used as the default value, and + * can be overridden on a per-layer basis. + * + * @param gradientNormalization Type of normalization to use. Defaults to None. + * @see GradientNormalization + */ + @Getter + @Setter + @NonNull + @lombok.Builder.Default + private GradientNormalization gradientNormalization = GradientNormalization.None; + /** + * Threshold for gradient normalization, only used for GradientNormalization.ClipL2PerLayer, + * GradientNormalization.ClipL2PerParamType, and + * GradientNormalization.ClipElementWiseAbsoluteValue
Not used otherwise.
L2 threshold for + * first two types of clipping, or absolute value threshold for last type of clipping.
Note: + * values set by this method will be applied to all applicable layers in the network, unless a + * different value is explicitly set on a given layer. In other words: values set via this method + * are used as the default value, and can be overridden on a per-layer basis. + */ + @Getter + @Setter + private double gradientNormalizationThreshold; + /** + * Activation function / neuron non-linearity
Note: values set by this method will be applied + * to all applicable layers in the network, unless a different value is explicitly set on a given + * layer. In other words: values set via this method are used as the default value, and can be + * overridden on a per-layer basis. + */ + @Getter + @Setter + private IActivation activation; + //whether to constrain the gradient to unit norm or not + @Getter + @Setter + private StepFunction stepFunction; + @Getter + @Setter + @lombok.Builder.Default + private OptimizationAlgorithm optimizationAlgo = OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT; + @Getter + @Setter + @lombok.Builder.Default + private int maxNumLineSearchIterations = 5; + /** + * Set the regularization for the parameters (excluding biases) - for example {@link WeightDecay}
+ * Note: values set by this method will be applied to all applicable layers in the network, unless a different + * value is explicitly set on a given layer. In other words: values set via this method are used as the default + * value, and can be overridden on a per-layer basis.
+ * + * @param regularization Regularization to apply for the network parameters/weights (excluding biases) + */ + @Getter + @lombok.Builder.Default + private List regularization = new ArrayList<>(); + /** + * Set the regularization for the biases only - for example {@link WeightDecay}
+ * Note: values set by this method will be applied to all applicable layers in the network, unless a different + * value is explicitly set on a given layer. In other words: values set via this method are used as the default + * value, and can be overridden on a per-layer basis.
+ * + * @param regularizationBias Regularization to apply for the network biases only + */ + @Getter + @lombok.Builder.Default + private List regularizationBias = new ArrayList<>(); + @Getter + @Setter + @lombok.Builder.Default + private IUpdater iUpdater = new Sgd(); + /** + * Gradient updater configuration, for the biases only. If not set, biases will use the updater as + * set by {@link #setIUpdater(IUpdater)}
+ * Note: values set by this method will be applied to all applicable layers in the network, unless a different + * value is explicitly set on a given layer. In other words: values set via this method are used as the default + * value, and can be overridden on a per-layer basis. + * + * @param updater Updater to use for bias parameters + */ + @Getter + @Setter + @lombok.Builder.Default + private IUpdater biasUpdater = null; + @Getter + @Setter + @lombok.Builder.Default + private IActivation activationFn = new ActivationSigmoid(); - /** - * Creates and returns a deep copy of the configuration. - */ - @Override - public NeuralNetConfiguration clone() { + /** + * Sets the convolution mode for convolutional layers, which impacts padding and output sizes. + * See {@link ConvolutionMode} for details. Defaults to ConvolutionMode.TRUNCATE
+ * Note: values set by this method will be applied to all applicable layers in the network, unless a different + * value is explicitly set on a given layer. In other words: values set via this method are used as the default + * value, and can be overridden on a per-layer basis. + * @param convolutionMode Convolution mode to use + */ + @Getter + @Setter + @lombok.Builder.Default + private ConvolutionMode convolutionMode = ConvolutionMode.Truncate; + /** + * Sets the cuDNN algo mode for convolutional layers, which impacts performance and memory usage of cuDNN. + * See {@link ConvolutionLayer.AlgoMode} for details. Defaults to "PREFER_FASTEST", but "NO_WORKSPACE" uses less memory. + *
+ * Note: values set by this method will be applied to all applicable layers in the network, unless a different + * value is explicitly set on a given layer. In other words: values set via this method are used as the default + * value, and can be overridden on a per-layer basis. + * @param cudnnAlgoMode cuDNN algo mode to use + */ + @Getter + @Setter + @lombok.Builder.Default + private ConvolutionLayer.AlgoMode cudnnAlgoMode = ConvolutionLayer.AlgoMode.PREFER_FASTEST; + @Getter + @Setter + @lombok.Builder.Default + private boolean minimize = true; + /** + * Set the dropout for all layers in this network
+ * Note: values set by this method will be applied to all applicable layers in the network, unless a different + * value is explicitly set on a given layer. In other words: values set via this method are used as the default + * value, and can be overridden on a per-layer basis. + * * Dropout probability. This is the probability of retaining each input activation value for a layer. + * * dropOut(x) will keep an input activation with probability x, and set to 0 with probability 1-x.
+ * * dropOut(0.0) is a special value / special case - when set to 0.0., dropout is disabled (not applied). Note + * * that a dropout value of 1.0 is functionally equivalent to no dropout: i.e., 100% probability of retaining + * * each input activation.
+ * *

+ * * Note 1: Dropout is applied at training time only - and is automatically not applied at test time + * * (for evaluation, etc)
+ * * Note 2: This sets the probability per-layer. Care should be taken when setting lower values for + * * complex networks (too much information may be lost with aggressive (very low) dropout values).
+ * * Note 3: Frequently, dropout is not applied to (or, has higher retain probability for) input (first layer) + * * layers. Dropout is also often not applied to output layers. This needs to be handled MANUALLY by the user + * * - set .dropout(0) on those layers when using global dropout setting.
+ * * Note 4: Implementation detail (most users can ignore): DL4J uses inverted dropout, as described here: + * * http://cs231n.github.io/neural-networks-2/ + * *

+ * *
+ * * Note: values set by this method will be applied to all applicable layers in the network, unless a different + * * value is explicitly set on a given layer. In other words: values set via this method are used as the default + * * value, and can be overridden on a per-layer basis. + * * + * * @param inputRetainProbability Dropout probability (probability of retaining each input activation value for a layer) + * * @see #dropOut(IDropout) + * + * + * @param dropout Dropout, such as {@link Dropout}, {@link org.deeplearning4j.nn.conf.dropout.GaussianDropout}, + * {@link org.deeplearning4j.nn.conf.dropout.GaussianNoise} etc + * @return + */ + @Getter + @Setter + private IDropout idropOut; + /** + * Set the weight noise (such as {@link org.deeplearning4j.nn.conf.weightnoise.DropConnect} and + * {@link org.deeplearning4j.nn.conf.weightnoise.WeightNoise}) for the layers in this network.
+ * Note: values set by this method will be applied to all applicable layers in the network, unless a different + * value is explicitly set on a given layer. In other words: values set via this method are used as the default + * value, and can be overridden on a per-layer basis. + * + * @param weightNoise Weight noise instance to use + */ + @Getter + @Setter + private IWeightNoise weightNoise; + @Getter + @Setter + @lombok.Builder.Default + private double biasInit = 0.0; + @Getter + @Setter + @lombok.Builder.Default + private double gainInit = 1.0; + + /** + * Create a neural net configuration from json + * + * @param json the neural net configuration from json + * @return {@link NeuralNetConfiguration} + */ + public static NeuralNetConfiguration fromJson(String json) { + NeuralNetConfiguration conf; + ObjectMapper mapper = NeuralNetConfiguration.mapper(); + try { + conf = mapper.readValue(json, NeuralNetConfiguration.class); + } catch (InvalidTypeIdException e) { + if (e.getMessage().contains("@class")) { try { - NeuralNetConfiguration clone = (NeuralNetConfiguration) super.clone(); - if (clone.layer != null) - clone.layer = clone.layer.clone(); - if (clone.stepFunction != null) - clone.stepFunction = clone.stepFunction.clone(); - if (clone.variables != null) - clone.variables = new ArrayList<>(clone.variables); - return clone; - } catch (CloneNotSupportedException e) { - throw new RuntimeException(e); + //JSON may be legacy (1.0.0-alpha or earlier), attempt to load it using old format + return JsonMappers.getLegacyMapper().readValue(json, NeuralNetConfiguration.class); + } catch (InvalidTypeIdException e2) { + //Check for legacy custom layers: "Could not resolve type id 'CustomLayer' as a subtype of [simple type, class org.deeplearning4j.nn.conf.layers.ILayer]: known type ids = [Bidirectional, CenterLossOutputLayer, CnnLossLayer, ..." + //1.0.0-beta5: dropping support for custom layers defined in pre-1.0.0-beta format. Built-in layers from these formats still work + String msg = e2.getMessage(); + if (msg != null && msg.contains("Could not resolve type id")) { + throw new RuntimeException( + "Error deserializing NeuralNetConfiguration - configuration may have a custom " + + "layer, vertex or preprocessor, in pre version 1.0.0-beta JSON format.\nModels in legacy format with custom" + + + " layers should be loaded in 1.0.0-beta to 1.0.0-beta4 and saved again, before loading in the current version of DL4J", + e); + } + throw new RuntimeException(e2); + } catch (IOException e2) { + throw new RuntimeException(e2); } + } + throw new RuntimeException(e); + } catch (IOException e) { + //Check if this exception came from legacy deserializer... + String msg = e.getMessage(); + if (msg != null && msg.contains("legacy")) { + throw new RuntimeException( + "Error deserializing NeuralNetConfiguration - configuration may have a custom " + + "layer, vertex or preprocessor, in pre version 1.0.0-alpha JSON format. These layers can be " + + + "deserialized by first registering them with NeuralNetConfiguration.registerLegacyCustomClassesForJSON(Class...)", + e); + } + throw new RuntimeException(e); } - public List variables() { - return new ArrayList<>(variables); - } + //To maintain backward compatibility after loss function refactoring (configs generated with v0.5.0 or earlier) + // Previously: enumeration used for loss functions. Now: use classes + // IN the past, could have only been an OutputLayer or RnnOutputLayer using these enums + int layerCount = 0; + JsonNode confs = null; + for (LayerConfiguration nnc : conf.getFlattenedLayerConfigurations()) { + LayerConfiguration l = nnc; + if (l instanceof BaseOutputLayer && ((BaseOutputLayer) l).getLossFn() == null) { + //lossFn field null -> may be an old config format, with lossFunction field being for the enum + //if so, try walking the JSON graph to extract out the appropriate enum value - public List variables(boolean copy) { - if (copy) - return variables(); - return variables; - } + BaseOutputLayer ol = (BaseOutputLayer) l; + try { + JsonNode jsonNode = mapper.readTree(json); + if (confs == null) { + confs = jsonNode.get("confs"); + } + if (confs instanceof ArrayNode) { + ArrayNode layerConfs = (ArrayNode) confs; + JsonNode outputLayerNNCNode = layerConfs.get(layerCount); + if (outputLayerNNCNode == null) { + throw new RuntimeException( + "should never happen"); //return conf; //Should never happen... + } + JsonNode outputLayerNode = outputLayerNNCNode.get("layer"); - public void addVariable(String variable) { - if (!variables.contains(variable)) { - variables.add(variable); + JsonNode lossFunctionNode = null; + if (outputLayerNode.has("output")) { + lossFunctionNode = outputLayerNode.get("output").get("lossFunction"); + } else if (outputLayerNode.has("rnnoutput")) { + lossFunctionNode = outputLayerNode.get("rnnoutput").get("lossFunction"); + } + + if (lossFunctionNode != null) { + String lossFunctionEnumStr = lossFunctionNode.asText(); + LossFunctions.LossFunction lossFunction = null; + try { + lossFunction = LossFunctions.LossFunction.valueOf(lossFunctionEnumStr); + } catch (Exception e) { + log.warn( + "OutputLayer with null LossFunction or pre-0.6.0 loss function configuration detected: could not parse JSON", + e); + } + + if (lossFunction != null) { + switch (lossFunction) { + case MSE: + ol.setLossFn(new LossMSE()); + break; + case XENT: + ol.setLossFn(new LossBinaryXENT()); + break; + case NEGATIVELOGLIKELIHOOD: + ol.setLossFn(new LossNegativeLogLikelihood()); + break; + case MCXENT: + ol.setLossFn(new LossMCXENT()); + break; + + //Remaining: TODO + case SQUARED_LOSS: + case RECONSTRUCTION_CROSSENTROPY: + default: + log.warn( + "OutputLayer with null LossFunction or pre-0.6.0 loss function configuration detected: could not set loss function for {}", + lossFunction); + break; + } + } + } + + } else { + log.warn( + "OutputLayer with null LossFunction or pre-0.6.0 loss function configuration detected: could not parse JSON: layer 'confs' field is not an ArrayNode (is: {})", + (confs != null ? confs.getClass() : null)); + } + } catch (IOException e) { + log.warn( + "OutputLayer with null LossFunction or pre-0.6.0 loss function configuration detected: could not parse JSON", + e); + break; } + } + + //Also, pre 0.7.2: activation functions were Strings ("activationFunction" field), not classes ("activationFn") + //Try to load the old format if necessary, and create the appropriate IActivation instance + if ((l instanceof BaseLayer) && ((BaseLayer) l).getActivationFn() == null) { + try { + JsonNode jsonNode = mapper.readTree(json); + if (confs == null) { + confs = jsonNode.get("confs"); + } + if (confs instanceof ArrayNode) { + ArrayNode layerConfs = (ArrayNode) confs; + JsonNode outputLayerNNCNode = layerConfs.get(layerCount); + if (outputLayerNNCNode == null) { + throw new RuntimeException( + "Should never happen"); //return conf; //Should never happen... + } + JsonNode layerWrapperNode = outputLayerNNCNode.get("layer"); + + if (layerWrapperNode == null || layerWrapperNode.size() != 1) { + continue; + } + + JsonNode layerNode = layerWrapperNode.elements().next(); + JsonNode activationFunction = layerNode.get( + "activationFunction"); //Should only have 1 element: "dense", "output", etc + + if (activationFunction != null) { + IActivation ia = Activation.fromString(activationFunction.asText()) + .getActivationFunction(); + ((BaseLayer) l).setActivationFn(ia); + } + } + + } catch (IOException e) { + log.warn( + "ILayer with null ActivationFn field or pre-0.7.2 activation function detected: could not parse JSON", + e); + } + } + + if (!handleLegacyWeightInitFromJson(json, l, mapper, confs, layerCount)) { + return conf; + } + + layerCount++; } + return conf; + } - public void clearVariables() { - variables.clear(); - } + /** + * Handle {@link WeightInit} and {@link Distribution} from legacy configs in Json format. Copied + * from handling of {@link Activation} above. + * + * @return True if all is well and layer iteration shall continue. False else-wise. + */ + private static boolean handleLegacyWeightInitFromJson(String json, LayerConfiguration l, + ObjectMapper mapper, + JsonNode confs, int layerCount) { + if ((l instanceof BaseLayer) && ((BaseLayer) l).getWeightInitFn() == null) { + try { + JsonNode jsonNode = mapper.readTree(json); + if (confs == null) { + confs = jsonNode.get("confs"); + } + if (confs instanceof ArrayNode) { + ArrayNode layerConfs = (ArrayNode) confs; + JsonNode outputLayerNNCNode = layerConfs.get(layerCount); + if (outputLayerNNCNode == null) { + return false; //Should never happen... + } + JsonNode layerWrapperNode = outputLayerNNCNode.get("layer"); - /** - * Fluent interface for building a list of configurations - */ - public static class ListBuilder extends MultiLayerConfiguration.Builder { - private int layerCounter = -1; //Used only for .layer(Layer) method - private final Map layerwise; - private final Builder globalConfig; + if (layerWrapperNode == null || layerWrapperNode.size() != 1) { + return true; + } - // Constructor - public ListBuilder(Builder globalConfig, Map layerMap) { - this.globalConfig = globalConfig; - this.layerwise = layerMap; + JsonNode layerNode = layerWrapperNode.elements().next(); + JsonNode weightInit = layerNode.get( + "weightInit"); //Should only have 1 element: "dense", "output", etc + JsonNode distribution = layerNode.get("dist"); + + Distribution dist = null; + if (distribution != null) { + dist = mapper.treeToValue(distribution, Distribution.class); + } + + if (weightInit != null) { + final IWeightInit wi = WeightInit.valueOf(weightInit.asText()) + .getWeightInitFunction(dist); + ((BaseLayer) l).setWeightInitFn(wi); + } } - public ListBuilder(Builder globalConfig) { - this(globalConfig, new HashMap()); + } catch (IOException e) { + log.warn( + "ILayer with null WeightInit detected: " + l.getLayerName() + ", could not parse JSON", + e); + } + } + return true; + + } + + /** + * Object mapper for serialization of configurations + * + * @return + */ + public static ObjectMapper mapperYaml() { + return JsonMappers.getMapperYaml(); + } + + /** + * Object mapper for serialization of configurations + * + * @return + */ + public static ObjectMapper mapper() { + return JsonMappers.getMapper(); + } + + public static NeuralNetConfiguration fromYaml(String input) { + throw new RuntimeException("Needs fixing - not supported."); //TODO + } + + + /** + * @return JSON representation of NN configuration + */ + public String toYaml() { + ObjectMapper mapper = NeuralNetConfiguration.mapperYaml(); + synchronized (mapper) { + try { + return mapper.writeValueAsString(this); + } catch (com.fasterxml.jackson.core.JsonProcessingException e) { + throw new RuntimeException(e); + } + } + } + + /** + * @return JSON representation of NN configuration + */ + public String toJson() { + ObjectMapper mapper = NeuralNetConfiguration.mapper(); + synchronized (mapper) { + //JSON mappers are supposed to be thread safe: however, in practice they seem to miss fields occasionally + //when writeValueAsString is used by multiple threads. This results in invalid JSON. See issue #3243 + try { + return mapper.writeValueAsString(this); + } catch (com.fasterxml.jackson.core.JsonProcessingException e) { + log.error(e.getMessage()); + throw new RuntimeException(e); + } + } + } + + @Override + public String toString() { + return toJson(); + } + + @Override + public NeuralNetConfiguration clone() { + NeuralNetConfiguration clone; + clone = (NeuralNetConfiguration) super.clone(); + clone.stepFunction = clone.stepFunction.clone(); + clone.netWideVariables = new ArrayList<>(netWideVariables); + clone.getInnerConfigurations().addAll(innerConfigurations); + + if (clone.getInputPreProcessors() != null) { + Map map = new HashMap<>(); + for (Map.Entry entry : clone.getInputPreProcessors().entrySet()) { + map.put(entry.getKey(), entry.getValue().clone()); + } + clone.getInputPreProcessors().clear(); + clone.getInputPreProcessors().putAll(map); + } + + clone.setInferenceWorkspaceMode(this.inferenceWorkspaceMode); + clone.setTrainingWorkspaceMode(this.trainingWorkspaceMode); + clone.setCacheMode(this.cacheMode); + clone.setValidateOutputLayerConfig(this.validateOutputLayerConfig); + clone.setDataType(this.dataType); + + return clone; + + } + + /** + * + */ + @Override + public void init() { + getNetConfigurations().stream().forEach( conf -> conf.init()); //call init on all embedded configurations + innerConfigurations.add(0, this); //put this configuration at first place + getLayerConfigurations().stream().forEach( lconf -> lconf.setNetConfiguration(this)); //set this as net config for all layers (defined in here, not stacked + + + //Validate BackpropType setting + if ((tbpttBackLength != DEFAULT_TBPTT_LENGTH || tbpttFwdLength != DEFAULT_TBPTT_LENGTH) + && backpropType != BackpropType.TruncatedBPTT) { + log.warn("Truncated backpropagation through time lengths have been configured with values " + + tbpttFwdLength + + " and " + tbpttBackLength + " but backprop type is set to " + backpropType + + ". TBPTT configuration" + + " settings will only take effect if backprop type is set to BackpropType.TruncatedBPTT"); + } + + if (backpropType == BackpropType.TruncatedBPTT && validateTbpttConfig) { + //Check for invalid combination - tbptt plus LastTimeStepLayer or + for (int i = 0; i < getFlattenedLayerConfigurations().size(); i++) { + LayerConfiguration l = getFlattenedLayerConfigurations().get(i); + if (l instanceof LastTimeStep || l instanceof GlobalPoolingLayer) { + throw new IllegalStateException( + "Invalid network configuration detected: Truncated backpropagation through time (TBPTT)" + + + " cannot be used with layer " + i + " of type " + l.getClass().getName() + + ": TBPTT is incompatible with this layer type (which is designed " + + "to process entire sequences at once, and does support the type of sequence segments that TPBTT uses).\n" + + + "This check can be disabled using validateTbpttConfig(false) but this is not recommended."); + } + } + } + + if (inputType == null && inputPreProcessors.get(0) == null) { + //User hasn't set the InputType. Sometimes we can infer it... + // For example, Dense/RNN layers, where preprocessor isn't set -> user is *probably* going to feed in + // standard feedforward or RNN data + //This isn't the most elegant implementation, but should avoid breaking backward compatibility here + //Can't infer InputType for CNN layers, however (don't know image dimensions/depth) + LayerConfiguration firstLayer = getFlattenedLayerConfigurations().get(0); + if (firstLayer instanceof BaseRecurrentLayer) { + BaseRecurrentLayer brl = (BaseRecurrentLayer) firstLayer; + val nIn = brl.getNIn(); + if (nIn > 0) { + inputType = InputType.recurrent(nIn, brl.getRnnDataFormat()); + } + } else if (firstLayer instanceof DenseLayer || firstLayer instanceof EmbeddingLayer + || firstLayer instanceof OutputLayer) { + //Can't just use "instanceof FeedForwardLayer" here. ConvolutionLayer is also a FeedForwardLayer + FeedForwardLayer ffl = (FeedForwardLayer) firstLayer; + val nIn = ffl.getNIn(); + if (nIn > 0) { + inputType = InputType.feedForward(nIn); + } + } + } + + //Add preprocessors and set nIns, if InputType has been set + // Builder.inputType field can be set in 1 of 4 ways: + // 1. User calls setInputType directly + // 2. Via ConvolutionLayerSetup -> internally calls setInputType(InputType.convolutional(...)) + // 3. Via the above code: i.e., assume input is as expected by the RNN or dense layer -> sets the inputType field + if(inputPreProcessors == null) { + inputPreProcessors = new HashMap<>(); + } + if (inputType != null) { + InputType currentInputType = inputType; + for (int i = 0; i < getFlattenedLayerConfigurations().size(); i++) { + LayerConfiguration l = getFlattenedLayerConfigurations().get(i); + if (inputPreProcessors.get(i) == null) { + //Don't override preprocessor setting, but set preprocessor if required... + @NonNull + InputPreProcessor inputPreProcessor = l.getPreProcessorForInputType(currentInputType); + if (inputPreProcessor != null) { + inputPreProcessors.put(i, inputPreProcessor); + } } - public ListBuilder layer(int ind, @NonNull Layer layer) { - if (layerwise.containsKey(ind)) { - log.info("Layer index {} already exists, layer of type {} will be replace by layer type {}", - ind, layerwise.get(ind).getClass().getSimpleName(), layer.getClass().getSimpleName()); - layerwise.get(ind).layer(layer); + InputPreProcessor inputPreProcessor = inputPreProcessors.get(i); + if (inputPreProcessor != null) { + currentInputType = inputPreProcessor.getOutputType(currentInputType); + } + if (i > 0) { + LayerConfiguration layer = getFlattenedLayerConfigurations().get(i - 1); + //convolution 1d is an edge case where it has rnn input type but the filters + //should be the output + if (layer instanceof Convolution1DLayer) { + if (l instanceof DenseLayer && inputType instanceof InputType.InputTypeRecurrent) { + FeedForwardLayer feedForwardLayer = (FeedForwardLayer) l; + if (inputType instanceof InputType.InputTypeRecurrent) { + InputType.InputTypeRecurrent recurrent = (InputType.InputTypeRecurrent) inputType; + feedForwardLayer.setNIn(recurrent.getTimeSeriesLength()); + } } else { - layerwise.put(ind, globalConfig.clone().layer(layer)); + l.setNIn(currentInputType, + overrideNinUponBuild); //Don't override the nIn setting, if it's manually set by the user } - if(layerCounter < ind){ - //Edge case: user is mixing .layer(Layer) and .layer(int, Layer) calls - //This should allow a .layer(A, X) and .layer(Y) to work such that layer Y is index (A+1) - layerCounter = ind; - } - return this; + } else { + l.setNIn(currentInputType, + overrideNinUponBuild); //Don't override the nIn setting, if it's manually set by the user + } + + } else { + l.setNIn(currentInputType, + overrideNinUponBuild); //Don't override the nIn setting, if it's manually set by the user } - public ListBuilder layer(Layer layer){ - return layer(++layerCounter, layer); - } + currentInputType = l.getOutputType(i, currentInputType); + } - public Map getLayerwise() { - return layerwise; - } + } - @Override - public ListBuilder setInputType(InputType inputType){ - return (ListBuilder)super.setInputType(inputType); - } + Nd4j.getRandom().setSeed(getNetConfigurations().get(0).getSeed()); - /** - * A convenience method for setting input types: note that for example .inputType().convolutional(h,w,d) - * is equivalent to .setInputType(InputType.convolutional(h,w,d)) - */ - public ListBuilder.InputTypeBuilder inputType(){ - return new InputTypeBuilder(); - } + //Validate output layer configuration + if (isValidateOutputLayerConfig()) { + //Validate output layer configurations... + for (LayerConfiguration n : getFlattenedLayerConfigurations()) { + OutputLayerUtil.validateOutputLayer(n.getLayerName(), n); //No-op for non output/loss layers + } + } + } - /** - * For the (perhaps partially constructed) network configuration, return a list of activation sizes for each - * layer in the network.
- * Note: To use this method, the network input type must have been set using {@link #setInputType(InputType)} first - * @return A list of activation types for the network, indexed by layer number - */ - public List getLayerActivationTypes(){ - Preconditions.checkState(inputType != null, "Can only calculate activation types if input type has" + - "been set. Use setInputType(InputType)"); + public InputPreProcessor getInputPreProcess(int curr) { + return inputPreProcessors.get(curr); + } - MultiLayerConfiguration conf; - try{ - conf = build(); - } catch (Exception e){ - throw new RuntimeException("Error calculating layer activation types: error instantiating MultiLayerConfiguration", e); - } + /** + * Get a {@link MemoryReport} for the given NeuralNetConfiguration. This is used to estimate the + * memory requirements for the given network configuration and input + * + * @param inputType Input types for the network + * @return Memory report for the network + */ + public NetworkMemoryReport getMemoryReport(InputType inputType) { - return conf.getLayerActivationTypes(inputType); - } + Map memoryReportMap = new LinkedHashMap<>(); + int nLayers = getFlattenedLayerConfigurations().size(); + for (int i = 0; i < nLayers; i++) { + String layerName = getFlattenedLayerConfigurations().get(i).getLayerName(); + if (layerName == null) { + layerName = String.valueOf(i); + } - /** - * Build the multi layer network - * based on this neural network and - * overr ridden parameters - * - * @return the configuration to build - */ - public MultiLayerConfiguration build() { - List list = new ArrayList<>(); - if (layerwise.isEmpty()) - throw new IllegalStateException("Invalid configuration: no layers defined"); - for (int i = 0; i < layerwise.size(); i++) { - if (layerwise.get(i) == null) { - throw new IllegalStateException("Invalid configuration: layer number " + i - + " not specified. Expect layer " + "numbers to be 0 to " + (layerwise.size() - 1) - + " inclusive (number of layers defined: " + layerwise.size() + ")"); - } - if (layerwise.get(i).getLayer() == null) - throw new IllegalStateException("Cannot construct network: Layer config for" + "layer with index " - + i + " is not defined)"); + //Pass input type through preprocessor, if necessary + InputPreProcessor preproc = getInputPreProcess(i); + //TODO memory requirements for preprocessor + if (preproc != null) { + inputType = preproc.getOutputType(inputType); + } - //Layer names: set to default, if not set - if (layerwise.get(i).getLayer().getLayerName() == null) { - layerwise.get(i).getLayer().setLayerName("layer" + i); - } + LayerMemoryReport report = getFlattenedLayerConfigurations().get(i).getMemoryReport(inputType); + memoryReportMap.put(layerName, report); - list.add(layerwise.get(i).build()); - } + inputType = getFlattenedLayerConfigurations().get(i).getOutputType(i, inputType); + } - WorkspaceMode wsmTrain = (globalConfig.setTWM ? globalConfig.trainingWorkspaceMode : trainingWorkspaceMode); - WorkspaceMode wsmTest = (globalConfig.setIWM ? globalConfig.inferenceWorkspaceMode : inferenceWorkspaceMode); + return new NetworkMemoryReport(memoryReportMap, NeuralNetConfiguration.class, + "MultiLayerNetwork", inputType); + } + + /** + * For the given input shape/type for the network, return a list of activation sizes for each + * layer in the network.
i.e., list.get(i) is the output activation sizes for layer i + * + * @param inputType Input type for the network + * @return A lits of activation types for the network, indexed by layer number + */ + public List getLayerActivationTypes(@NonNull InputType inputType) { + List out = new ArrayList<>(); + int nLayers = getFlattenedLayerConfigurations().size(); + for (int i = 0; i < nLayers; i++) { + InputPreProcessor preproc = getInputPreProcess(i); + if (preproc != null) { + inputType = preproc.getOutputType(inputType); + } + + inputType = getFlattenedLayerConfigurations().get(i).getOutputType(i, inputType); + out.add(inputType); + } + return out; + } - return new MultiLayerConfiguration.Builder().inputPreProcessors(inputPreProcessors) - .backpropType(backpropType).tBPTTForwardLength(tbpttFwdLength) - .tBPTTBackwardLength(tbpttBackLength).setInputType(this.inputType) - .trainingWorkspaceMode(wsmTrain).cacheMode(globalConfig.cacheMode) - .inferenceWorkspaceMode(wsmTest).confs(list).validateOutputLayerConfig(validateOutputConfig) - .dataType(globalConfig.dataType) - .build(); - } + public List netWideVariables() { + return netWideVariables; + } - /** Helper class for setting input types */ - public class InputTypeBuilder { + public List netWideVariables(boolean copy) { + if (copy) { + return netWideVariables(); + } + return netWideVariables; + } + + public void addNetWideVariable(String variable) { + if (!netWideVariables.contains(variable)) { + netWideVariables.add(variable); + log.trace("Adding neural network wide variable '{}' to the list of variables. New length is {}.", variable, netWideVariables.size()); + } + log.trace("Skipped adding neural network wide variable '{}' to the list of variables. It was already present. Length remains {}.", variable, netWideVariables.size()); + } + + public void clearNetWideVariable() { + + netWideVariables.clear(); + log.trace("Adding neural network wide variables have been cleared. New length is {}.", netWideVariables.size()); + } + + + + /** + * From the list of layers and neural net configurations, only return the Layer Configurations that + * are defined in this neural network (it does not include embedded neural network configuration + * layers) + * @return list with layer configurations + */ + public List getLayerConfigurations() { + return innerConfigurations.stream() + .filter(obj -> (obj instanceof LayerConfiguration)) + .map( obj -> (LayerConfiguration)obj ) + .collect( Collectors.toList()); + } + + /** + * From the list of layers and neural net configurations, only return the neural net configurations + * @return list with neural net configurations + */ + public List getNetConfigurations() { + return innerConfigurations.stream() + .filter(obj -> (obj instanceof NeuralNetConfiguration)) + .map( obj -> (NeuralNetConfiguration)obj ) + .collect( Collectors.toList()); + } + + /** + * From the list of layer configurations and inner neural net configurations, create a single, + * flattened list of layer configurations with inheritance parameters resolved + * + * @return list of layer configurations + */ + public List getFlattenedLayerConfigurations(NeuralNetConfiguration conf) { + List ret = new ArrayList<>(); //create the final return list + for( Object obj : conf.getInnerConfigurations().stream().skip(1) //don't include self + .collect(Collectors.toList())) { + //if Layer Config, include in list and inherit parameters from this conf + //else if neural net configuration, call self recursively to resolve layer configurations + if (obj instanceof LayerConfiguration) + ret.add((LayerConfiguration) obj); + else if (obj instanceof NeuralNetConfiguration) + ret.addAll(getFlattenedLayerConfigurations( + (NeuralNetConfiguration) obj)); + else { + log.error( + "The list of layers and neural network configurations does contain an object of {}. Element will be ignored.", + obj.getClass().getSimpleName()); + } + } /** - * See {@link InputType#convolutional(long, long, long)} - */ - public ListBuilder convolutional(int height, int width, int depth){ - return ListBuilder.this.setInputType(InputType.convolutional(height, width, depth)); + LayerConfiguration lc = ((LayerConfiguration) lc).getType().getClazz().cast(obj); + switch(lc.getType()) { + case FC: { //fully connected layer + ((FeedForwardLayer) lc).setWeightInitFn(this.getWeightInitFn()); } + if(lc instanceof FeedForwardLayer && ((FeedForwardLayer) lc).getWeightInitFn() == null) { + **/ + return ret; + } - /** - * * See {@link InputType#convolutionalFlat(long, long, long)} - */ - public ListBuilder convolutionalFlat(int height, int width, int depth){ - return ListBuilder.this.setInputType(InputType.convolutionalFlat(height, width, depth)); - } + /** + * Sames as {@link #getFlattenedLayerConfigurations(NeuralNetConfiguration)}, but uses this configurations + * list of configurations + * @return list of layer configurations + */ + public List getFlattenedLayerConfigurations() { + return getFlattenedLayerConfigurations(this); + } - /** - * See {@link InputType#feedForward(long)} - */ - public ListBuilder feedForward(int size){ - return ListBuilder.this.setInputType(InputType.feedForward(size)); - } - /** - * See {@link InputType#recurrent(long)}} - */ - public ListBuilder recurrent(int size){ - return ListBuilder.this.setInputType(InputType.recurrent(size)); - } - } + /** + * Get the configuration of the first layer + * @return layer configuration + */ + /** + public LayerConfiguration getFirstLayer() { + return getFlattenedLayerConfigurations().get(0); + } +**/ + + /** + * Add a new layer to the first position + * @param layer configuration + */ + public void setLayer(@NonNull LayerConfiguration layer) { + innerConfigurations.add(0, layer); + } + + @Deprecated + public LayerConfiguration getConf(int index) { + return getFlattenedLayerConfigurations().get(index); + } + + public static abstract class NeuralNetConfigurationBuilder> extends + NeuralNetBaseBuilderConfigurationBuilder { + + public ComputationGraphConfiguration.GraphBuilder graphBuilder() { + return new ComputationGraphConfiguration.GraphBuilder(this); } - /** - * Return this configuration as json - * - * @return this configuration represented as json - */ - public String toYaml() { - ObjectMapper mapper = mapperYaml(); - - try { - String ret = mapper.writeValueAsString(this); - return ret; - - } catch (com.fasterxml.jackson.core.JsonProcessingException e) { - throw new RuntimeException(e); - } + public NeuralNetConfigurationBuilder clone() { + try { + return (NeuralNetConfigurationBuilder) super.clone(); + } catch(CloneNotSupportedException ex) { + throw new RuntimeException(ex); + } } - /** - * Create a neural net configuration from json - * - * @param json the neural net configuration from json - * @return - */ - public static NeuralNetConfiguration fromYaml(String json) { - ObjectMapper mapper = mapperYaml(); - try { - NeuralNetConfiguration ret = mapper.readValue(json, NeuralNetConfiguration.class); - return ret; - } catch (IOException e) { - throw new RuntimeException(e); - } - } + } - /** - * Return this configuration as json - * - * @return this configuration represented as json - */ - public String toJson() { - ObjectMapper mapper = mapper(); - - try { - return mapper.writeValueAsString(this); - } catch (com.fasterxml.jackson.core.JsonProcessingException e) { - throw new RuntimeException(e); - } - } - - /** - * Create a neural net configuration from json - * - * @param json the neural net configuration from json - * @return - */ - public static NeuralNetConfiguration fromJson(String json) { - ObjectMapper mapper = mapper(); - try { - NeuralNetConfiguration ret = mapper.readValue(json, NeuralNetConfiguration.class); - return ret; - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - /** - * Object mapper for serialization of configurations - * - * @return - */ - public static ObjectMapper mapperYaml() { - return JsonMappers.getMapperYaml(); - } - - /** - * Object mapper for serialization of configurations - * - * @return - */ - public static ObjectMapper mapper() { - return JsonMappers.getMapper(); - } - - /** - * NeuralNetConfiguration builder, used as a starting point for creating a MultiLayerConfiguration or - * ComputationGraphConfiguration.
- * Note that values set here on the layer will be applied to all relevant layers - unless the value is overridden - * on a layer's configuration - */ - @Data - public static class Builder implements Cloneable { - protected IActivation activationFn = new ActivationSigmoid(); - protected IWeightInit weightInitFn = new WeightInitXavier(); - protected double biasInit = 0.0; - protected double gainInit = 1.0; - protected List regularization = new ArrayList<>(); - protected List regularizationBias = new ArrayList<>(); - protected IDropout idropOut; - protected IWeightNoise weightNoise; - protected IUpdater iUpdater = new Sgd(); - protected IUpdater biasUpdater = null; - protected Layer layer; - protected boolean miniBatch = true; - protected int maxNumLineSearchIterations = 5; - protected long seed = System.currentTimeMillis(); - protected OptimizationAlgorithm optimizationAlgo = OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT; - protected StepFunction stepFunction = null; - protected boolean minimize = true; - protected GradientNormalization gradientNormalization = GradientNormalization.None; - protected double gradientNormalizationThreshold = 1.0; - protected List allParamConstraints; - protected List weightConstraints; - protected List biasConstraints; - - protected WorkspaceMode trainingWorkspaceMode = WorkspaceMode.ENABLED; - protected WorkspaceMode inferenceWorkspaceMode = WorkspaceMode.ENABLED; - protected boolean setTWM = false; - protected boolean setIWM = false; - protected CacheMode cacheMode = CacheMode.NONE; - protected DataType dataType = DataType.FLOAT; - - protected ConvolutionMode convolutionMode = ConvolutionMode.Truncate; - protected ConvolutionLayer.AlgoMode cudnnAlgoMode = ConvolutionLayer.AlgoMode.PREFER_FASTEST; - - public Builder() { - // - } - - public Builder(NeuralNetConfiguration newConf) { - if (newConf != null) { - minimize = newConf.minimize; - maxNumLineSearchIterations = newConf.maxNumLineSearchIterations; - layer = newConf.layer; - optimizationAlgo = newConf.optimizationAlgo; - seed = newConf.seed; - stepFunction = newConf.stepFunction; - miniBatch = newConf.miniBatch; - } - } - - /** - * Process input as minibatch vs full dataset. - * Default set to true. - */ - public Builder miniBatch(boolean miniBatch) { - this.miniBatch = miniBatch; - return this; - } - - /** - * This method defines Workspace mode being used during training:
- * NONE: workspace won't be used
- * ENABLED: workspaces will be used for training (reduced memory and better performance) - * - * @param workspaceMode Workspace mode for training - * @return Builder - */ - public Builder trainingWorkspaceMode(@NonNull WorkspaceMode workspaceMode) { - this.trainingWorkspaceMode = workspaceMode; - this.setTWM = true; - return this; - } - - /** - * This method defines Workspace mode being used during inference:
- * NONE: workspace won't be used
- * ENABLED: workspaces will be used for inference (reduced memory and better performance) - * - * @param workspaceMode Workspace mode for inference - * @return Builder - */ - public Builder inferenceWorkspaceMode(@NonNull WorkspaceMode workspaceMode) { - this.inferenceWorkspaceMode = workspaceMode; - this.setIWM = true; - return this; - } - - /** - * This method defines how/if preOutput cache is handled: - * NONE: cache disabled (default value) - * HOST: Host memory will be used - * DEVICE: GPU memory will be used (on CPU backends effect will be the same as for HOST) - * - * @param cacheMode Cache mode to use - * @return Builder - */ - public Builder cacheMode(@NonNull CacheMode cacheMode) { - this.cacheMode = cacheMode; - return this; - } - - /** - * Objective function to minimize or maximize cost function - * Default set to minimize true. - */ - public Builder minimize(boolean minimize) { - this.minimize = minimize; - return this; - } - - /** - * Maximum number of line search iterations. - * Only applies for line search optimizers: Line Search SGD, Conjugate Gradient, LBFGS - * is NOT applicable for standard SGD - * - * @param maxNumLineSearchIterations > 0 - * @return - */ - public Builder maxNumLineSearchIterations(int maxNumLineSearchIterations) { - this.maxNumLineSearchIterations = maxNumLineSearchIterations; - return this; - } - - - /** - * Layer class. - */ - public Builder layer(Layer layer) { - this.layer = layer; - return this; - } - - /** - * Step function to apply for back track line search. - * Only applies for line search optimizers: Line Search SGD, Conjugate Gradient, LBFGS - * Options: DefaultStepFunction (default), NegativeDefaultStepFunction - * GradientStepFunction (for SGD), NegativeGradientStepFunction - */ - @Deprecated - public Builder stepFunction(StepFunction stepFunction) { - this.stepFunction = stepFunction; - return this; - } - - /** - * Create a ListBuilder (for creating a MultiLayerConfiguration)
- * Usage:
- *
-         * {@code .list()
-         * .layer(new DenseLayer.Builder()...build())
-         * ...
-         * .layer(new OutputLayer.Builder()...build())
-         * }
-         * 
- */ - public ListBuilder list() { - return new ListBuilder(this); - } - - /** - * Create a ListBuilder (for creating a MultiLayerConfiguration) with the specified layers
- * Usage:
- *
-         * {@code .list(
-         *      new DenseLayer.Builder()...build(),
-         *      ...,
-         *      new OutputLayer.Builder()...build())
-         * }
-         * 
- * - * @param layers The layer configurations for the network - */ - public ListBuilder list(Layer... layers) { - if (layers == null || layers.length == 0) - throw new IllegalArgumentException("Cannot create network with no layers"); - Map layerMap = new HashMap<>(); - for (int i = 0; i < layers.length; i++) { - Builder b = this.clone(); - b.layer(layers[i]); - layerMap.put(i, b); - } - return new ListBuilder(this, layerMap); - - } - - /** - * Create a GraphBuilder (for creating a ComputationGraphConfiguration). - */ - public ComputationGraphConfiguration.GraphBuilder graphBuilder() { - return new ComputationGraphConfiguration.GraphBuilder(this); - } - - /** - * Random number generator seed. Used for reproducability between runs - */ - public Builder seed(long seed) { - this.seed = seed; - Nd4j.getRandom().setSeed(seed); - return this; - } - - /** - * Optimization algorithm to use. Most common: OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT - * - * @param optimizationAlgo Optimization algorithm to use when training - */ - public Builder optimizationAlgo(OptimizationAlgorithm optimizationAlgo) { - this.optimizationAlgo = optimizationAlgo; - return this; - } - - @Override - public Builder clone() { - try { - Builder clone = (Builder) super.clone(); - if (clone.layer != null) - clone.layer = clone.layer.clone(); - if (clone.stepFunction != null) - clone.stepFunction = clone.stepFunction.clone(); - - return clone; - - } catch (CloneNotSupportedException e) { - throw new RuntimeException(e); - } - } - - /** - * Activation function / neuron non-linearity
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis. - * - * @see #activation(Activation) - */ - public Builder activation(IActivation activationFunction) { - this.activationFn = activationFunction; - return this; - } - - /** - * Activation function / neuron non-linearity
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis. - */ - public Builder activation(Activation activation) { - return activation(activation.getActivationFunction()); - } - - - /** - * Weight initialization scheme to use, for initial weight values - * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis. - * - * @see IWeightInit - */ - public Builder weightInit(IWeightInit weightInit) { - this.weightInitFn = weightInit; - return this; - } - - /** - * Weight initialization scheme to use, for initial weight values - * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis. - * - * @see WeightInit - */ - public Builder weightInit(WeightInit weightInit) { - if(weightInit == WeightInit.DISTRIBUTION) { - // throw new UnsupportedOperationException("Not supported!, Use weightInit(Distribution distribution) instead!"); - } - - this.weightInitFn = weightInit.getWeightInitFunction(); - return this; - } - - /** - * Set weight initialization scheme to random sampling via the specified distribution. - * Equivalent to: {@code .weightInit(new WeightInitDistribution(distribution))} - * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis. - * - * @param distribution Distribution to use for weight initialization - */ - public Builder weightInit(Distribution distribution){ - return weightInit(new WeightInitDistribution(distribution)); - } - - /** - * Constant for bias initialization. Default: 0.0
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis. - * - * @param biasInit Constant for bias initialization - */ - public Builder biasInit(double biasInit) { - this.biasInit = biasInit; - return this; - } - - /** - * Distribution to sample initial weights from. - * Equivalent to: {@code .weightInit(new WeightInitDistribution(distribution))}.
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis. - * - * @see #weightInit(Distribution) - * @deprecated Use {@link #weightInit(Distribution)} - */ - @Deprecated - public Builder dist(Distribution dist) { - return weightInit(dist); - } - - /** - * L1 regularization coefficient for the weights (excluding biases).
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis. - */ - public Builder l1(double l1) { - //Check if existing L1 exists; if so, replace it - NetworkUtils.removeInstances(this.regularization, L1Regularization.class); - if(l1 > 0.0) { - this.regularization.add(new L1Regularization(l1)); - } - return this; - } - - /** - * L2 regularization coefficient for the weights (excluding biases).
- * Note: Generally, {@link WeightDecay} (set via {@link #weightDecay(double)} should be preferred to - * L2 regularization. See {@link WeightDecay} javadoc for further details.
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis.
- * Note: L2 regularization and weight decay usually should not be used together; if any weight decay (or L2) has - * been added for the biases, these will be removed first. - * - * @see #weightDecay(double, boolean) - */ - public Builder l2(double l2) { - //Check if existing L2 exists; if so, replace it. Also remove weight decay - it doesn't make sense to use both - NetworkUtils.removeInstances(this.regularization, L2Regularization.class); - if(l2 > 0.0) { - NetworkUtils.removeInstancesWithWarning(this.regularization, WeightDecay.class, "WeightDecay regularization removed: incompatible with added L2 regularization"); - this.regularization.add(new L2Regularization(l2)); - } - return this; - } - - /** - * L1 regularization coefficient for the bias.
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis. - */ - public Builder l1Bias(double l1Bias) { - NetworkUtils.removeInstances(this.regularizationBias, L1Regularization.class); - if(l1Bias > 0.0) { - this.regularizationBias.add(new L1Regularization(l1Bias)); - } - return this; - } - - /** - * L2 regularization coefficient for the bias.
- * Note: Generally, {@link WeightDecay} (set via {@link #weightDecayBias(double,boolean)} should be preferred to - * L2 regularization. See {@link WeightDecay} javadoc for further details.
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis.
- * Note: L2 regularization and weight decay usually should not be used together; if any weight decay (or L2) has - * been added for the biases, these will be removed first. - * - * @see #weightDecayBias(double, boolean) - */ - public Builder l2Bias(double l2Bias) { - NetworkUtils.removeInstances(this.regularizationBias, L2Regularization.class); - if(l2Bias > 0.0) { - NetworkUtils.removeInstancesWithWarning(this.regularizationBias, WeightDecay.class, "L2 bias regularization removed: incompatible with added WeightDecay regularization"); - this.regularizationBias.add(new L2Regularization(l2Bias)); - } - return this; - } - - /** - * Add weight decay regularization for the network parameters (excluding biases).
- * This applies weight decay with multiplying the learning rate - see {@link WeightDecay} for more details.
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis.
- * - * @param coefficient Weight decay regularization coefficient - * @see #weightDecay(double, boolean) - */ - public Builder weightDecay(double coefficient) { - return weightDecay(coefficient, true); - } - - /** - * Add weight decay regularization for the network parameters (excluding biases). See {@link WeightDecay} for more details.
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis.
- * - * @param coefficient Weight decay regularization coefficient - * @param applyLR Whether the learning rate should be multiplied in when performing weight decay updates. See {@link WeightDecay} for more details. - * @see #weightDecay(double, boolean) - */ - public Builder weightDecay(double coefficient, boolean applyLR) { - //Check if existing weight decay if it exists; if so, replace it. Also remove L2 - it doesn't make sense to use both - NetworkUtils.removeInstances(this.regularization, WeightDecay.class); - if(coefficient > 0.0) { - NetworkUtils.removeInstancesWithWarning(this.regularization, L2Regularization.class, "L2 regularization removed: incompatible with added WeightDecay regularization"); - this.regularization.add(new WeightDecay(coefficient, applyLR)); - } - return this; - } - - /** - * Weight decay for the biases only - see {@link #weightDecay(double)} for more details. - * This applies weight decay with multiplying the learning rate.
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis.
- * - * @param coefficient Weight decay regularization coefficient - * @see #weightDecayBias(double, boolean) - */ - public Builder weightDecayBias(double coefficient) { - return weightDecayBias(coefficient, true); - } - - /** - * Weight decay for the biases only - see {@link #weightDecay(double)} for more details
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis.
- * - * @param coefficient Weight decay regularization coefficient - */ - public Builder weightDecayBias(double coefficient, boolean applyLR) { - //Check if existing weight decay if it exists; if so, replace it. Also remove L2 - it doesn't make sense to use both - NetworkUtils.removeInstances(this.regularizationBias, WeightDecay.class); - if(coefficient > 0) { - NetworkUtils.removeInstancesWithWarning(this.regularizationBias, L2Regularization.class, "L2 bias regularization removed: incompatible with added WeightDecay regularization"); - this.regularizationBias.add(new WeightDecay(coefficient, applyLR)); - } - return this; - } - - /** - * Set the regularization for the parameters (excluding biases) - for example {@link WeightDecay}
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis.
- * - * @param regularization Regularization to apply for the network parameters/weights (excluding biases) - */ - public Builder regularization(List regularization) { - this.regularization = regularization; - return this; - } - - /** - * Set the regularization for the biases only - for example {@link WeightDecay}
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis.
- * - * @param regularizationBias Regularization to apply for the network biases only - */ - public Builder regularizationBias(List regularizationBias) { - this.regularizationBias = regularizationBias; - return this; - } - - /** - * Dropout probability. This is the probability of retaining each input activation value for a layer. - * dropOut(x) will keep an input activation with probability x, and set to 0 with probability 1-x.
- * dropOut(0.0) is a special value / special case - when set to 0.0., dropout is disabled (not applied). Note - * that a dropout value of 1.0 is functionally equivalent to no dropout: i.e., 100% probability of retaining - * each input activation.
- *

- * Note 1: Dropout is applied at training time only - and is automatically not applied at test time - * (for evaluation, etc)
- * Note 2: This sets the probability per-layer. Care should be taken when setting lower values for - * complex networks (too much information may be lost with aggressive (very low) dropout values).
- * Note 3: Frequently, dropout is not applied to (or, has higher retain probability for) input (first layer) - * layers. Dropout is also often not applied to output layers. This needs to be handled MANUALLY by the user - * - set .dropout(0) on those layers when using global dropout setting.
- * Note 4: Implementation detail (most users can ignore): DL4J uses inverted dropout, as described here: - * http://cs231n.github.io/neural-networks-2/ - *

- *
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis. - * - * @param inputRetainProbability Dropout probability (probability of retaining each input activation value for a layer) - * @see #dropOut(IDropout) - */ - public Builder dropOut(double inputRetainProbability) { - if(inputRetainProbability == 0.0){ - return dropOut(null); - } - return dropOut(new Dropout(inputRetainProbability)); - } - - /** - * Set the dropout for all layers in this network
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis. - * - * @param dropout Dropout, such as {@link Dropout}, {@link org.deeplearning4j.nn.conf.dropout.GaussianDropout}, - * {@link org.deeplearning4j.nn.conf.dropout.GaussianNoise} etc - * @return - */ - public Builder dropOut(IDropout dropout){ - //Clone: Dropout is stateful usually - don't want to have the same instance shared in multiple places - this.idropOut = (dropout == null ? null : dropout.clone()); - return this; - } - - /** - * Set the weight noise (such as {@link org.deeplearning4j.nn.conf.weightnoise.DropConnect} and - * {@link org.deeplearning4j.nn.conf.weightnoise.WeightNoise}) for the layers in this network.
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis. - * - * @param weightNoise Weight noise instance to use - */ - public Builder weightNoise(IWeightNoise weightNoise){ - this.weightNoise = weightNoise; - return this; - } - - - /** - * @deprecated Use {@link #updater(IUpdater)} - */ - @Deprecated - public Builder updater(Updater updater) { - return updater(updater.getIUpdaterWithDefaultConfig()); - } - - /** - * Gradient updater configuration. For example, {@link org.nd4j.linalg.learning.config.Adam} - * or {@link org.nd4j.linalg.learning.config.Nesterovs}
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis. - * - * @param updater Updater to use - */ - public Builder updater(IUpdater updater) { - this.iUpdater = updater; - return this; - } - - /** - * Gradient updater configuration, for the biases only. If not set, biases will use the updater as - * set by {@link #updater(IUpdater)}
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis. - * - * @param updater Updater to use for bias parameters - */ - public Builder biasUpdater(IUpdater updater){ - this.biasUpdater = updater; - return this; - } - - /** - * Gradient normalization strategy. Used to specify gradient renormalization, gradient clipping etc. - * See {@link GradientNormalization} for details
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis. - * - * @param gradientNormalization Type of normalization to use. Defaults to None. - * @see GradientNormalization - */ - public Builder gradientNormalization(GradientNormalization gradientNormalization) { - this.gradientNormalization = gradientNormalization; - return this; - } - - /** - * Threshold for gradient normalization, only used for GradientNormalization.ClipL2PerLayer, - * GradientNormalization.ClipL2PerParamType, and GradientNormalization.ClipElementWiseAbsoluteValue
- * Not used otherwise.
- * L2 threshold for first two types of clipping, or absolute value threshold for last type of clipping.
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis. - */ - public Builder gradientNormalizationThreshold(double threshold) { - this.gradientNormalizationThreshold = threshold; - return this; - } - - /** - * Sets the convolution mode for convolutional layers, which impacts padding and output sizes. - * See {@link ConvolutionMode} for details. Defaults to ConvolutionMode.TRUNCATE
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis. - * @param convolutionMode Convolution mode to use - */ - public Builder convolutionMode(ConvolutionMode convolutionMode) { - this.convolutionMode = convolutionMode; - return this; - } - - /** - * Sets the cuDNN algo mode for convolutional layers, which impacts performance and memory usage of cuDNN. - * See {@link ConvolutionLayer.AlgoMode} for details. Defaults to "PREFER_FASTEST", but "NO_WORKSPACE" uses less memory. - *
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis. - * @param cudnnAlgoMode cuDNN algo mode to use - */ - public Builder cudnnAlgoMode(ConvolutionLayer.AlgoMode cudnnAlgoMode) { - this.cudnnAlgoMode = cudnnAlgoMode; - return this; - } - - /** - * Set constraints to be applied to all layers. Default: no constraints.
- * Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm regularization, - * etc). These constraints are applied at each iteration, after the parameters have been updated.
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis. - * - * @param constraints Constraints to apply to all parameters of all layers - */ - public Builder constrainAllParameters(LayerConstraint... constraints){ - this.allParamConstraints = Arrays.asList(constraints); - return this; - } - - /** - * Set constraints to be applied to all layers. Default: no constraints.
- * Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm regularization, - * etc). These constraints are applied at each iteration, after the parameters have been updated.
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis. - * - * @param constraints Constraints to apply to all bias parameters of all layers - */ - public Builder constrainBias(LayerConstraint... constraints) { - this.biasConstraints = Arrays.asList(constraints); - return this; - } - - /** - * Set constraints to be applied to all layers. Default: no constraints.
- * Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm regularization, - * etc). These constraints are applied at each iteration, after the parameters have been updated.
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis. - * - * @param constraints Constraints to apply to all weight parameters of all layers - */ - public Builder constrainWeights(LayerConstraint... constraints) { - this.weightConstraints = Arrays.asList(constraints); - return this; - } - - - /** - * Set the DataType for the network parameters and activations. Must be a floating point type: {@link DataType#DOUBLE}, - * {@link DataType#FLOAT} or {@link DataType#HALF}.
- */ - public Builder dataType(@NonNull DataType dataType){ - Preconditions.checkState(dataType == DataType.DOUBLE || dataType == DataType.FLOAT || dataType == DataType.HALF, - "Data type must be a floating point type: one of DOUBLE, FLOAT, or HALF. Got datatype: %s", dataType); - this.dataType = dataType; - return this; - } - - /** - * Return a configuration based on this builder - * - * @return - */ - public NeuralNetConfiguration build() { - - NeuralNetConfiguration conf = new NeuralNetConfiguration(); - conf.minimize = minimize; - conf.maxNumLineSearchIterations = maxNumLineSearchIterations; - conf.layer = layer; - conf.optimizationAlgo = optimizationAlgo; - conf.seed = seed; - conf.stepFunction = stepFunction; - conf.miniBatch = miniBatch; - conf.cacheMode = this.cacheMode; - conf.dataType = this.dataType; - - configureLayer(layer); - if (layer instanceof FrozenLayer) { - configureLayer(((FrozenLayer) layer).getLayer()); - } - - if (layer instanceof FrozenLayerWithBackprop) { - configureLayer(((FrozenLayerWithBackprop) layer).getUnderlying()); - } - - return conf; - } - - private void configureLayer(Layer layer) { - String layerName; - if (layer == null || layer.getLayerName() == null) - layerName = "Layer not named"; - else - layerName = layer.getLayerName(); - - if(layer instanceof AbstractSameDiffLayer){ - AbstractSameDiffLayer sdl = (AbstractSameDiffLayer)layer; - sdl.applyGlobalConfig(this); - } - - if (layer != null) { - copyConfigToLayer(layerName, layer); - } - - if (layer instanceof FrozenLayer) { - copyConfigToLayer(layerName, ((FrozenLayer) layer).getLayer()); - } - - if (layer instanceof FrozenLayerWithBackprop) { - copyConfigToLayer(layerName, ((FrozenLayerWithBackprop) layer).getUnderlying()); - } - - if (layer instanceof Bidirectional) { - Bidirectional b = (Bidirectional)layer; - copyConfigToLayer(b.getFwd().getLayerName(), b.getFwd()); - copyConfigToLayer(b.getBwd().getLayerName(), b.getBwd()); - } - - if(layer instanceof BaseWrapperLayer){ - BaseWrapperLayer bwr = (BaseWrapperLayer)layer; - configureLayer(bwr.getUnderlying()); - } - - if (layer instanceof ConvolutionLayer) { - ConvolutionLayer cl = (ConvolutionLayer) layer; - if (cl.getConvolutionMode() == null) { - cl.setConvolutionMode(convolutionMode); - } - if (cl.getCudnnAlgoMode() == null) { - cl.setCudnnAlgoMode(cudnnAlgoMode); - } - } - if (layer instanceof SubsamplingLayer) { - SubsamplingLayer sl = (SubsamplingLayer) layer; - if (sl.getConvolutionMode() == null) { - sl.setConvolutionMode(convolutionMode); - } - } - LayerValidation.generalValidation(layerName, layer, idropOut, regularization, regularizationBias, - allParamConstraints, weightConstraints, biasConstraints); - } - - private void copyConfigToLayer(String layerName, Layer layer) { - - if (layer.getIDropout() == null) { - //Dropout is stateful usually - don't want to have the same instance shared by multiple layers - layer.setIDropout(idropOut == null ? null : idropOut.clone()); - } - - if (layer instanceof BaseLayer) { - BaseLayer bLayer = (BaseLayer) layer; - if (bLayer.getRegularization() == null || bLayer.getRegularization().isEmpty()) - bLayer.setRegularization(regularization); - if (bLayer.getRegularizationBias() == null || bLayer.getRegularizationBias().isEmpty()) - bLayer.setRegularizationBias(regularizationBias); - if (bLayer.getActivationFn() == null) - bLayer.setActivationFn(activationFn); - if (bLayer.getWeightInitFn() == null) - bLayer.setWeightInitFn(weightInitFn); - if (Double.isNaN(bLayer.getBiasInit())) - bLayer.setBiasInit(biasInit); - if (Double.isNaN(bLayer.getGainInit())) - bLayer.setGainInit(gainInit); - - //Configure weight noise: - if(weightNoise != null && ((BaseLayer) layer).getWeightNoise() == null){ - ((BaseLayer) layer).setWeightNoise(weightNoise.clone()); - } - - //Configure updaters: - if(iUpdater != null && bLayer.getIUpdater() == null){ - bLayer.setIUpdater(iUpdater.clone()); //Clone the updater to avoid shared instances - in case of setLearningRate calls later - } - if(biasUpdater != null && bLayer.getBiasUpdater() == null){ - bLayer.setBiasUpdater(biasUpdater.clone()); //Clone the updater to avoid shared instances - in case of setLearningRate calls later - } - - if(bLayer.getIUpdater() == null && iUpdater == null && bLayer.initializer().numParams(bLayer) > 0){ - //No updater set anywhere - IUpdater u = new Sgd(); - bLayer.setIUpdater(u); - log.warn("*** No updater configuration is set for layer {} - defaulting to {} ***", layerName, u); - } - - if (bLayer.getGradientNormalization() == null) - bLayer.setGradientNormalization(gradientNormalization); - if (Double.isNaN(bLayer.getGradientNormalizationThreshold())) - bLayer.setGradientNormalizationThreshold(gradientNormalizationThreshold); - } - - if (layer instanceof ActivationLayer){ - ActivationLayer al = (ActivationLayer)layer; - if(al.getActivationFn() == null) - al.setActivationFn(activationFn); - } - } - } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/BaseConstraint.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/BaseConstraint.java index fafb7a78e..f9a3e81f0 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/BaseConstraint.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/BaseConstraint.java @@ -53,12 +53,12 @@ public abstract class BaseConstraint implements LayerConstraint { @Override public void applyConstraint(Layer layer, int iteration, int epoch) { - Map paramTable = layer.paramTable(); + Map paramTable = layer.getParamTable(); if(paramTable == null || paramTable.isEmpty() ){ return; } - ParamInitializer i = layer.conf().getLayer().initializer(); + ParamInitializer i = layer.getLayerConfiguration().initializer(); for(Map.Entry e : paramTable.entrySet()){ if(params.contains(e.getKey())){ apply(e.getValue()); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/LayerVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/LayerVertex.java index 0c7565db1..f93c1619b 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/LayerVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/LayerVertex.java @@ -21,11 +21,13 @@ package org.deeplearning4j.nn.conf.graph; import lombok.Data; +import lombok.Getter; import lombok.NoArgsConstructor; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.inputs.InvalidInputTypeException; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.memory.MemoryReport; import org.deeplearning4j.nn.graph.ComputationGraph; import org.nd4j.linalg.api.buffer.DataType; @@ -37,16 +39,18 @@ import java.util.Arrays; @Data public class LayerVertex extends GraphVertex { - private NeuralNetConfiguration layerConf; + private NeuralNetConfiguration netConfiguration; + @Getter + private LayerConfiguration layerConfiguration; private InputPreProcessor preProcessor; //Set outputVertex to true when ILayer is an OutputLayer, OR For use in specialized situations like reinforcement learning - // For RL situations, this ILayer insn't an OutputLayer, but is the last layer in a graph, that gets its error/epsilon + // For RL situations, this ILayer isn't an OutputLayer, but is the last layer in a graph, that gets its error/epsilon // passed in externally private boolean outputVertex; - public LayerVertex(NeuralNetConfiguration layerConf, InputPreProcessor preProcessor) { - this.layerConf = layerConf; + public LayerVertex(NeuralNetConfiguration netConfiguration, InputPreProcessor preProcessor) { + this.netConfiguration = netConfiguration; this.preProcessor = preProcessor; } @@ -56,7 +60,8 @@ public class LayerVertex extends GraphVertex { @Override public GraphVertex clone() { - return new LayerVertex(layerConf.clone(), (preProcessor != null ? preProcessor.clone() : null)); + return new LayerVertex( + netConfiguration.clone(), (preProcessor != null ? preProcessor.clone() : null)); } @Override @@ -64,10 +69,11 @@ public class LayerVertex extends GraphVertex { if (!(o instanceof LayerVertex)) return false; LayerVertex lv = (LayerVertex) o; - if ((layerConf == null && lv.layerConf != null) || (layerConf != null && lv.layerConf == null)) { + if ((netConfiguration == null && lv.netConfiguration != null) || (netConfiguration != null && lv.netConfiguration + == null)) { return false; } - if (layerConf != null && !layerConf.equals(lv.layerConf)) + if (netConfiguration != null && !netConfiguration.equals(lv.netConfiguration)) return false; if (preProcessor == null && lv.preProcessor != null || preProcessor != null && lv.preProcessor == null) return false; @@ -76,12 +82,12 @@ public class LayerVertex extends GraphVertex { @Override public int hashCode() { - return layerConf.hashCode() ^ (preProcessor != null ? preProcessor.hashCode() : 0); + return netConfiguration.hashCode() ^ (preProcessor != null ? preProcessor.hashCode() : 0); } @Override public long numParams(boolean backprop) { - return layerConf.getLayer().initializer().numParams(layerConf); + return layerConfiguration.initializer().numParams(layerConfiguration); } @Override @@ -99,13 +105,13 @@ public class LayerVertex extends GraphVertex { INDArray paramsView, boolean initializeParams, DataType networkDatatype) { //Now, we need to work out if this vertex is an output vertex or not... boolean isOutput = graph.getComputationGraphConfiguration().getNetworkOutputs().contains(name); - + this.layerConfiguration = graph.getLayer(idx).getLayerConfiguration(); org.deeplearning4j.nn.api.Layer layer = - layerConf.getLayer().instantiate(layerConf, null, idx, paramsView, initializeParams, networkDatatype); + layerConfiguration.instantiate(netConfiguration, null, idx, paramsView, initializeParams, networkDatatype); if(layer == null) { throw new IllegalStateException("Encountered null layer during initialization for layer:" + - layerConf.getLayer().getClass().getSimpleName() + " initialization returned null layer?"); + layerConfiguration.getClass().getSimpleName() + " initialization returned null layer?"); } return new org.deeplearning4j.nn.graph.vertex.impl.LayerVertex(graph, name, idx, layer, preProcessor, isOutput, networkDatatype); @@ -125,7 +131,7 @@ public class LayerVertex extends GraphVertex { else afterPreprocessor = preProcessor.getOutputType(vertexInputs[0]); - InputType ret = layerConf.getLayer().getOutputType(layerIndex, afterPreprocessor); + InputType ret = layerConfiguration.getOutputType(layerIndex, afterPreprocessor); return ret; } @@ -142,11 +148,13 @@ public class LayerVertex extends GraphVertex { it = inputTypes[0]; } //TODO preprocessor memory - return layerConf.getLayer().getMemoryReport(it); + return layerConfiguration.getMemoryReport(it); } @Override public void setDataType(DataType dataType){ - layerConf.getLayer().setDataType(dataType); + layerConfiguration.setDataType(dataType); } + + } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ActivationLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ActivationLayer.java index 0b10cedd4..378ae01a2 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ActivationLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ActivationLayer.java @@ -21,6 +21,7 @@ package org.deeplearning4j.nn.conf.layers; import lombok.*; +import net.brutex.ai.dnn.api.LayerType; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.InputPreProcessor; @@ -48,6 +49,7 @@ public class ActivationLayer extends NoParamLayer { protected ActivationLayer(Builder builder) { super(builder); + setType(LayerType.ACT); this.activationFn = builder.activationFn; initializeConstraints(builder); } @@ -75,13 +77,16 @@ public class ActivationLayer extends NoParamLayer { @Override public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { - org.deeplearning4j.nn.layers.ActivationLayer ret = new org.deeplearning4j.nn.layers.ActivationLayer(conf, networkDataType); + this.setNetConfiguration(conf); + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + + org.deeplearning4j.nn.layers.ActivationLayer ret = new org.deeplearning4j.nn.layers.ActivationLayer(lconf, networkDataType); ret.setListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); return ret; } @@ -126,7 +131,7 @@ public class ActivationLayer extends NoParamLayer { @NoArgsConstructor @Getter @Setter - public static class Builder extends org.deeplearning4j.nn.conf.layers.Layer.Builder { + public static class Builder extends LayerConfiguration.Builder { /** * Activation function for the layer diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AutoEncoder.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AutoEncoder.java index 09f14e034..311359f7f 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AutoEncoder.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AutoEncoder.java @@ -55,14 +55,17 @@ public class AutoEncoder extends BasePretrainNetwork { @Override public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { + this.setNetConfiguration(conf); + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); org.deeplearning4j.nn.layers.feedforward.autoencoder.AutoEncoder ret = - new org.deeplearning4j.nn.layers.feedforward.autoencoder.AutoEncoder(conf, networkDataType); + new org.deeplearning4j.nn.layers.feedforward.autoencoder.AutoEncoder(lconf, networkDataType); + ret.setListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(conf.getFlattenedLayerConfigurations().get(layerIndex)); return ret; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseLayer.java index 6aad5b0ef..bf30e0f7a 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseLayer.java @@ -47,9 +47,10 @@ import java.util.List; @Data @EqualsAndHashCode(callSuper = true) @NoArgsConstructor -public abstract class BaseLayer extends Layer implements Serializable, Cloneable { +public abstract class BaseLayer extends LayerConfiguration implements Serializable, Cloneable { protected IActivation activationFn; + @NonNull protected IWeightInit weightInitFn; protected double biasInit; protected double gainInit; @@ -153,7 +154,7 @@ public abstract class BaseLayer extends Layer implements Serializable, Cloneable @SuppressWarnings("unchecked") @Getter @Setter - public abstract static class Builder> extends Layer.Builder { + public abstract static class Builder> extends LayerConfiguration.Builder { /** * Set the activation function for the layer. This overload can be used for custom {@link IActivation} diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseUpsamplingLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseUpsamplingLayer.java index b92ad390f..07220f89e 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseUpsamplingLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseUpsamplingLayer.java @@ -21,10 +21,8 @@ package org.deeplearning4j.nn.conf.layers; import lombok.*; -import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.params.EmptyParamInitializer; /** * Upsampling base layer @@ -64,7 +62,7 @@ public abstract class BaseUpsamplingLayer extends NoParamLayer { @NoArgsConstructor @Getter @Setter - protected static abstract class UpsamplingBuilder> extends Layer.Builder { + protected static abstract class UpsamplingBuilder> extends LayerConfiguration.Builder { /** * An int array to specify upsampling dimensions, the length of which has to equal to the number of spatial diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BatchNormalization.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BatchNormalization.java index 2dd228b0e..68e3a0851 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BatchNormalization.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BatchNormalization.java @@ -21,6 +21,7 @@ package org.deeplearning4j.nn.conf.layers; import lombok.*; +import net.brutex.ai.dnn.api.LayerType; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.api.layers.LayerConstraint; @@ -64,6 +65,7 @@ public class BatchNormalization extends FeedForwardLayer { private BatchNormalization(Builder builder) { super(builder); + this.setType(LayerType.BN); this.decay = builder.decay; this.eps = builder.eps; this.isMinibatch = builder.isMinibatch; @@ -89,16 +91,18 @@ public class BatchNormalization extends FeedForwardLayer { @Override public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { - LayerValidation.assertNOutSet("BatchNormalization", getLayerName(), layerIndex, getNOut()); + this.setNetConfiguration(conf); + LayerValidation.assertNOutSet("BatchNormalization", getLayerName(), layerIndex, getNOut()); + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); org.deeplearning4j.nn.layers.normalization.BatchNormalization ret = - new org.deeplearning4j.nn.layers.normalization.BatchNormalization(conf, networkDataType); + new org.deeplearning4j.nn.layers.normalization.BatchNormalization(lconf, networkDataType); ret.setListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); return ret; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CapsuleLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CapsuleLayer.java index 4081930c9..05d32dc56 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CapsuleLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CapsuleLayer.java @@ -211,7 +211,7 @@ public class CapsuleLayer extends SameDiffLayer { } @Override - public E build() { + public E build() { return (E) new CapsuleLayer(this); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CapsuleStrengthLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CapsuleStrengthLayer.java index bd75b863e..e702b2de1 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CapsuleStrengthLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CapsuleStrengthLayer.java @@ -59,7 +59,7 @@ public class CapsuleStrengthLayer extends SameDiffLambdaLayer { public static class Builder extends SameDiffLambdaLayer.Builder{ @Override - public E build() { + public E build() { return (E) new CapsuleStrengthLayer(this); } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CenterLossOutputLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CenterLossOutputLayer.java index 820d73d5d..a25a10947 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CenterLossOutputLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CenterLossOutputLayer.java @@ -61,15 +61,17 @@ public class CenterLossOutputLayer extends BaseOutputLayer { @Override public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { + setNetConfiguration(conf); LayerValidation.assertNInNOutSet("CenterLossOutputLayer", getLayerName(), layerIndex, getNIn(), getNOut()); + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); - Layer ret = new org.deeplearning4j.nn.layers.training.CenterLossOutputLayer(conf, networkDataType); - ret.setListeners(trainingListeners); + Layer ret = new org.deeplearning4j.nn.layers.training.CenterLossOutputLayer(lconf, networkDataType); + ret.setListeners(trainingListeners.toArray(new TrainingListener[]{})); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); return ret; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Cnn3DLossLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Cnn3DLossLayer.java index 774397ede..5c3cede7e 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Cnn3DLossLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Cnn3DLossLayer.java @@ -56,14 +56,16 @@ public class Cnn3DLossLayer extends FeedForwardLayer { @Override public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { + setNetConfiguration(conf); + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); org.deeplearning4j.nn.layers.convolution.Cnn3DLossLayer ret = - new org.deeplearning4j.nn.layers.convolution.Cnn3DLossLayer(conf, networkDataType); + new org.deeplearning4j.nn.layers.convolution.Cnn3DLossLayer(lconf, networkDataType); ret.setListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); return ret; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CnnLossLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CnnLossLayer.java index 0b31dd703..bcad7fb65 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CnnLossLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CnnLossLayer.java @@ -61,14 +61,16 @@ public class CnnLossLayer extends FeedForwardLayer { @Override public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { + setNetConfiguration(conf); + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); org.deeplearning4j.nn.layers.convolution.CnnLossLayer ret = - new org.deeplearning4j.nn.layers.convolution.CnnLossLayer(conf, networkDataType); + new org.deeplearning4j.nn.layers.convolution.CnnLossLayer(lconf, networkDataType); ret.setListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); return ret; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1DLayer.java index 1bd0e5172..eeb023374 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1DLayer.java @@ -64,16 +64,17 @@ public class Convolution1DLayer extends ConvolutionLayer { public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { + setNetConfiguration(conf); LayerValidation.assertNInNOutSet("Convolution1DLayer", getLayerName(), layerIndex, getNIn(), getNOut()); - + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); org.deeplearning4j.nn.layers.convolution.Convolution1DLayer ret = - new org.deeplearning4j.nn.layers.convolution.Convolution1DLayer(conf, networkDataType); + new org.deeplearning4j.nn.layers.convolution.Convolution1DLayer(lconf, networkDataType); ret.setListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); return ret; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution3D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution3D.java index f012b0008..28a03ed4e 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution3D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution3D.java @@ -97,13 +97,15 @@ public class Convolution3D extends ConvolutionLayer { int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { LayerValidation.assertNInNOutSet("Convolution3D", getLayerName(), layerIndex, getNIn(), getNOut()); - Convolution3DLayer ret = new Convolution3DLayer(conf, networkDataType); + + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + Convolution3DLayer ret = new Convolution3DLayer(lconf, networkDataType); ret.setListeners(iterationListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); return ret; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java index ae26e62f0..a09d33506 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java @@ -21,6 +21,7 @@ package org.deeplearning4j.nn.conf.layers; import lombok.*; +import net.brutex.ai.dnn.api.LayerType; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.*; @@ -113,6 +114,7 @@ public class ConvolutionLayer extends FeedForwardLayer { */ protected ConvolutionLayer(BaseConvBuilder builder) { super(builder); + this.setType(LayerType.CONV); int dim = builder.convolutionDim; this.hasBias = builder.hasBias; @@ -168,16 +170,19 @@ public class ConvolutionLayer extends FeedForwardLayer { @Override public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { + setNetConfiguration(conf); + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + LayerValidation.assertNInNOutSet("ConvolutionLayer", getLayerName(), layerIndex, getNIn(), getNOut()); org.deeplearning4j.nn.layers.convolution.ConvolutionLayer ret = - new org.deeplearning4j.nn.layers.convolution.ConvolutionLayer(conf, networkDataType); + new org.deeplearning4j.nn.layers.convolution.ConvolutionLayer(lconf, networkDataType); ret.setListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); return ret; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution2D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution2D.java index e4f789ab7..d5b113b7f 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution2D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution2D.java @@ -81,16 +81,19 @@ public class Deconvolution2D extends ConvolutionLayer { @Override public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { + setNetConfiguration(conf); LayerValidation.assertNInNOutSet("Deconvolution2D", getLayerName(), layerIndex, getNIn(), getNOut()); + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); org.deeplearning4j.nn.layers.convolution.Deconvolution2DLayer ret = - new org.deeplearning4j.nn.layers.convolution.Deconvolution2DLayer(conf, networkDataType); + new org.deeplearning4j.nn.layers.convolution.Deconvolution2DLayer(lconf, networkDataType); + ret.setListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); return ret; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution3D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution3D.java index 9f96b25da..99ed3137b 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution3D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution3D.java @@ -30,10 +30,8 @@ import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.layers.convolution.Deconvolution2DLayer; import org.deeplearning4j.nn.layers.convolution.Deconvolution3DLayer; import org.deeplearning4j.nn.params.Deconvolution3DParamInitializer; -import org.deeplearning4j.nn.params.DeconvolutionParamInitializer; import org.deeplearning4j.optimize.api.TrainingListener; import org.deeplearning4j.util.ValidationUtils; import org.nd4j.linalg.api.buffer.DataType; @@ -84,15 +82,15 @@ public class Deconvolution3D extends ConvolutionLayer { public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { LayerValidation.assertNInNOutSet("Deconvolution2D", getLayerName(), layerIndex, getNIn(), getNOut()); - + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); Deconvolution3DLayer ret = - new Deconvolution3DLayer(conf, networkDataType); + new Deconvolution3DLayer(lconf, networkDataType); ret.setListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); return ret; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DenseLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DenseLayer.java index 1a6ce905c..fce42e8e5 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DenseLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DenseLayer.java @@ -28,6 +28,7 @@ import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.conf.memory.MemoryReport; import org.deeplearning4j.nn.params.DefaultParamInitializer; +import org.deeplearning4j.nn.weights.WeightInitXavier; import org.deeplearning4j.optimize.api.TrainingListener; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; @@ -35,6 +36,10 @@ import org.nd4j.linalg.api.ndarray.INDArray; import java.util.Collection; import java.util.Map; +/** + * Dense Layer + * Uses WeightInitXavier as default + */ @Data @NoArgsConstructor @ToString(callSuper = true) @@ -55,16 +60,20 @@ public class DenseLayer extends FeedForwardLayer { @Override public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { + LayerValidation.assertNInNOutSet("DenseLayerConfiguration", getLayerName(), layerIndex, getNIn(), getNOut()); + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); org.deeplearning4j.nn.layers.feedforward.dense.DenseLayer ret = - new org.deeplearning4j.nn.layers.feedforward.dense.DenseLayer(conf, networkDataType); + new org.deeplearning4j.nn.layers.feedforward.dense.DenseLayer(lconf, networkDataType); + if(getWeightInitFn() == null) setWeightInitFn(new WeightInitXavier()); ret.setListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); + return ret; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DepthwiseConvolution2D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DepthwiseConvolution2D.java index d412c7158..52eb89ecf 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DepthwiseConvolution2D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DepthwiseConvolution2D.java @@ -68,13 +68,15 @@ public class DepthwiseConvolution2D extends ConvolutionLayer { int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { LayerValidation.assertNInNOutSet("DepthwiseConvolution2D", getLayerName(), layerIndex, getNIn(), getNOut()); - DepthwiseConvolution2DLayer ret = new DepthwiseConvolution2DLayer(conf, networkDataType); + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + DepthwiseConvolution2DLayer ret = new DepthwiseConvolution2DLayer(lconf, networkDataType); + ret.setListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); return ret; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DropoutLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DropoutLayer.java index fa20692be..573b6c617 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DropoutLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DropoutLayer.java @@ -21,6 +21,7 @@ package org.deeplearning4j.nn.conf.layers; import lombok.*; +import net.brutex.ai.dnn.api.LayerType; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -46,7 +47,9 @@ import java.util.Map; public class DropoutLayer extends FeedForwardLayer { private DropoutLayer(Builder builder) { + super(builder); + setType(LayerType.DO); } public DropoutLayer(double activationRetainProb){ @@ -66,13 +69,17 @@ public class DropoutLayer extends FeedForwardLayer { public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { - org.deeplearning4j.nn.layers.DropoutLayer ret = new org.deeplearning4j.nn.layers.DropoutLayer(conf, networkDataType); + setNetConfiguration(conf); + + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + org.deeplearning4j.nn.layers.DropoutLayer ret = new org.deeplearning4j.nn.layers.DropoutLayer(lconf, networkDataType); + ret.setListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); return ret; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingLayer.java index 67199aa64..3ef26352b 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingLayer.java @@ -27,7 +27,6 @@ import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.conf.memory.MemoryReport; -import org.deeplearning4j.nn.params.DefaultParamInitializer; import org.deeplearning4j.nn.params.EmbeddingLayerParamInitializer; import org.deeplearning4j.nn.weights.IWeightInit; import org.deeplearning4j.nn.weights.embeddings.ArrayEmbeddingInitializer; @@ -58,14 +57,16 @@ public class EmbeddingLayer extends FeedForwardLayer { @Override public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); org.deeplearning4j.nn.layers.feedforward.embedding.EmbeddingLayer ret = - new org.deeplearning4j.nn.layers.feedforward.embedding.EmbeddingLayer(conf, networkDataType); + new org.deeplearning4j.nn.layers.feedforward.embedding.EmbeddingLayer(lconf, networkDataType); + ret.setListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); return ret; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingSequenceLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingSequenceLayer.java index ea7b4e6bf..133b0b6c1 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingSequenceLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingSequenceLayer.java @@ -65,14 +65,16 @@ public class EmbeddingSequenceLayer extends FeedForwardLayer { @Override public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); org.deeplearning4j.nn.layers.feedforward.embedding.EmbeddingSequenceLayer ret = - new org.deeplearning4j.nn.layers.feedforward.embedding.EmbeddingSequenceLayer(conf, networkDataType); + new org.deeplearning4j.nn.layers.feedforward.embedding.EmbeddingSequenceLayer(lconf, networkDataType); + ret.setListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); return ret; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/FeedForwardLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/FeedForwardLayer.java index 8e8fd62a3..3728e55bb 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/FeedForwardLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/FeedForwardLayer.java @@ -21,6 +21,7 @@ package org.deeplearning4j.nn.conf.layers; import lombok.*; +import net.brutex.ai.dnn.api.LayerType; import org.deeplearning4j.nn.conf.DataFormat; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -43,9 +44,11 @@ public abstract class FeedForwardLayer extends BaseLayer { super(builder); this.nIn = builder.nIn; this.nOut = builder.nOut; + setType(LayerType.FC); } + @Override public InputType getOutputType(int layerIndex, InputType inputType) { if (inputType == null || (inputType.getType() != InputType.Type.FF diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GlobalPoolingLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GlobalPoolingLayer.java index cdf92720a..1cd9e6c91 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GlobalPoolingLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GlobalPoolingLayer.java @@ -69,14 +69,16 @@ public class GlobalPoolingLayer extends NoParamLayer { public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); org.deeplearning4j.nn.layers.pooling.GlobalPoolingLayer ret = - new org.deeplearning4j.nn.layers.pooling.GlobalPoolingLayer(conf, networkDataType); + new org.deeplearning4j.nn.layers.pooling.GlobalPoolingLayer(lconf, networkDataType); + ret.setListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); return ret; } @@ -189,7 +191,7 @@ public class GlobalPoolingLayer extends NoParamLayer { @Getter @Setter - public static class Builder extends Layer.Builder { + public static class Builder extends LayerConfiguration.Builder { /** * Pooling type for global pooling diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesBidirectionalLSTM.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesBidirectionalLSTM.java index 76a943509..ac6242e9a 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesBidirectionalLSTM.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesBidirectionalLSTM.java @@ -59,7 +59,7 @@ public class GravesBidirectionalLSTM extends BaseRecurrentLayer { } @Override - protected void initializeConstraints(org.deeplearning4j.nn.conf.layers.Layer.Builder builder) { + protected void initializeConstraints(LayerConfiguration.Builder builder) { super.initializeConstraints(builder); if (((Builder) builder).recurrentConstraints != null) { if (constraints == null) { @@ -79,14 +79,16 @@ public class GravesBidirectionalLSTM extends BaseRecurrentLayer { @Override public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { + + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); org.deeplearning4j.nn.layers.recurrent.GravesBidirectionalLSTM ret = - new org.deeplearning4j.nn.layers.recurrent.GravesBidirectionalLSTM(conf, networkDataType); + new org.deeplearning4j.nn.layers.recurrent.GravesBidirectionalLSTM(lconf, networkDataType); ret.setListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); return ret; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesLSTM.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesLSTM.java index e12d6df22..bb84cedae 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesLSTM.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesLSTM.java @@ -59,7 +59,7 @@ public class GravesLSTM extends AbstractLSTM { } @Override - protected void initializeConstraints(org.deeplearning4j.nn.conf.layers.Layer.Builder builder) { + protected void initializeConstraints(LayerConfiguration.Builder builder) { super.initializeConstraints(builder); if (((Builder) builder).recurrentConstraints != null) { if (constraints == null) { @@ -77,14 +77,16 @@ public class GravesLSTM extends AbstractLSTM { public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { LayerValidation.assertNInNOutSet("GravesLSTM", getLayerName(), layerIndex, getNIn(), getNOut()); + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); org.deeplearning4j.nn.layers.recurrent.GravesLSTM ret = - new org.deeplearning4j.nn.layers.recurrent.GravesLSTM(conf, networkDataType); + new org.deeplearning4j.nn.layers.recurrent.GravesLSTM(lconf, networkDataType); + ret.setListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); return ret; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LSTM.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LSTM.java index 0f0d61fc3..8474d3089 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LSTM.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LSTM.java @@ -57,7 +57,7 @@ public class LSTM extends AbstractLSTM { } @Override - protected void initializeConstraints(org.deeplearning4j.nn.conf.layers.Layer.Builder builder) { + protected void initializeConstraints(LayerConfiguration.Builder builder) { super.initializeConstraints(builder); if (((Builder) builder).recurrentConstraints != null) { if (constraints == null) { @@ -75,13 +75,14 @@ public class LSTM extends AbstractLSTM { public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { LayerValidation.assertNInNOutSet("LSTM", getLayerName(), layerIndex, getNIn(), getNOut()); - org.deeplearning4j.nn.layers.recurrent.LSTM ret = new org.deeplearning4j.nn.layers.recurrent.LSTM(conf, networkDataType); + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + org.deeplearning4j.nn.layers.recurrent.LSTM ret = new org.deeplearning4j.nn.layers.recurrent.LSTM(lconf, networkDataType); ret.setListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); return ret; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Layer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerConfiguration.java similarity index 90% rename from cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Layer.java rename to cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerConfiguration.java index 66f48dd14..a41870c3d 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Layer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerConfiguration.java @@ -20,10 +20,20 @@ package org.deeplearning4j.nn.conf.layers; +import com.fasterxml.jackson.annotation.JsonTypeInfo; +import java.io.Serializable; +import java.lang.reflect.Field; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; import lombok.Data; import lombok.Getter; import lombok.NoArgsConstructor; +import lombok.NonNull; import lombok.Setter; +import net.brutex.ai.dnn.api.LayerType; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.api.TrainingConfig; import org.deeplearning4j.nn.api.layers.LayerConstraint; @@ -34,35 +44,49 @@ import org.deeplearning4j.nn.conf.dropout.IDropout; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.optimize.api.TrainingListener; +import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.learning.config.IUpdater; import org.nd4j.linalg.learning.regularization.Regularization; -import com.fasterxml.jackson.annotation.JsonTypeInfo; - -import java.io.Serializable; -import java.lang.reflect.Field; -import java.util.*; /** * A neural network layer. + * */ @JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, include = JsonTypeInfo.As.PROPERTY, property = "@class") @Data @NoArgsConstructor -public abstract class Layer implements TrainingConfig, Serializable, Cloneable { + +public abstract class LayerConfiguration implements TrainingConfig, Serializable, Cloneable { protected String layerName; + @Getter + protected List variables = new ArrayList<>(); + public void addVariable(String s) {variables.add(s);} + protected IDropout iDropout; protected List constraints; + /** + * The type of the layer, basically defines the base class and its properties + */ + @Getter @Setter @NonNull + private LayerType type = LayerType.UNKNOWN; - public Layer(Builder builder) { + @Getter @Setter + private NeuralNetConfiguration netConfiguration; + + public LayerConfiguration(Builder builder) { this.layerName = builder.layerName; this.iDropout = builder.iDropout; } + public String toJson() { + throw new RuntimeException("toJson is not implemented for LayerConfiguration"); + } + /** * Initialize the weight constraints. Should be called last, in the outer-most constructor */ @@ -113,10 +137,19 @@ public abstract class Layer implements TrainingConfig, Serializable, Cloneable { this.constraints = null; } + /** + * Migration workaround //TODO To be removed + * + * @return a layer configuration + */ + @Deprecated + public LayerConfiguration getLayer() { + return this; + } @Override - public Layer clone() { + public LayerConfiguration clone() { try { - Layer ret = (Layer) super.clone(); + LayerConfiguration ret = (LayerConfiguration) super.clone(); //Let's check for any INDArray fields and dup them (in case cloned layer will be used in different threads on CUDA... // we don't want it being relocated contantly between devices) Class c = getClass(); @@ -150,7 +183,7 @@ public abstract class Layer implements TrainingConfig, Serializable, Cloneable { } } - public abstract org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, + public abstract org.deeplearning4j.nn.api.Layer instantiate( @NonNull NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType); @@ -239,7 +272,14 @@ public abstract class Layer implements TrainingConfig, Serializable, Cloneable { */ public abstract LayerMemoryReport getMemoryReport(InputType inputType); - @SuppressWarnings("unchecked") + public void clearVariables() { + this.variables.clear(); + } + + @Getter + public IActivation activationFn; + + @SuppressWarnings("unchecked") @Getter @Setter public abstract static class Builder> { @@ -344,6 +384,6 @@ public abstract class Layer implements TrainingConfig, Serializable, Cloneable { return (T) this; } - public abstract E build(); + public abstract E build(); } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerValidation.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerValidation.java index 571f884e3..a125d4ffc 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerValidation.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerValidation.java @@ -74,7 +74,7 @@ public class LayerValidation { } } - public static void generalValidation(String layerName, Layer layer, IDropout iDropout, List regularization, + public static void generalValidation(String layerName, LayerConfiguration layer, IDropout iDropout, List regularization, List regularizationBias, List allParamConstraints, List weightConstraints, List biasConstraints) { @@ -82,8 +82,8 @@ public class LayerValidation { if (layer instanceof BaseLayer) { BaseLayer bLayer = (BaseLayer) layer; configureBaseLayer(layerName, bLayer, iDropout, regularization, regularizationBias); - } else if (layer instanceof FrozenLayer && ((FrozenLayer) layer).getLayer() instanceof BaseLayer) { - BaseLayer bLayer = (BaseLayer) ((FrozenLayer) layer).getLayer(); + } else if (layer instanceof FrozenLayer && ((FrozenLayer) layer).getInnerConfiguration() instanceof BaseLayer) { + BaseLayer bLayer = (BaseLayer) ((FrozenLayer) layer).getInnerConfiguration(); configureBaseLayer(layerName, bLayer, iDropout, regularization, regularizationBias); } else if (layer instanceof Bidirectional) { Bidirectional l = (Bidirectional) layer; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocalResponseNormalization.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocalResponseNormalization.java index 98d7fa093..77483640c 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocalResponseNormalization.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocalResponseNormalization.java @@ -44,7 +44,7 @@ import java.util.Map; @NoArgsConstructor @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -public class LocalResponseNormalization extends Layer { +public class LocalResponseNormalization extends LayerConfiguration { // Defaults as per http://www.cs.toronto.edu/~fritz/absps/imagenet.pdf //Set defaults here as well as in builder, in case users use no-arg constructor instead of builder @@ -75,14 +75,16 @@ public class LocalResponseNormalization extends Layer { public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); org.deeplearning4j.nn.layers.normalization.LocalResponseNormalization ret = - new org.deeplearning4j.nn.layers.normalization.LocalResponseNormalization(conf, networkDataType); + new org.deeplearning4j.nn.layers.normalization.LocalResponseNormalization(lconf, networkDataType); + ret.setListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); return ret; } @@ -154,7 +156,7 @@ public class LocalResponseNormalization extends Layer { @AllArgsConstructor @Getter @Setter - public static class Builder extends Layer.Builder { + public static class Builder extends LayerConfiguration.Builder { // defaults based on AlexNet model @@ -275,7 +277,7 @@ public class LocalResponseNormalization extends Layer { * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last). * See {@link CNN2DFormat} for more details.
* Default: NCHW - * @param format Format for activations (in and out) + * @param dataFormat Format for activations (in and out) */ public Builder dataFormat(CNN2DFormat dataFormat){ this.dataFormat = dataFormat; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected1D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected1D.java index 921d0f9ea..2a8afacb7 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected1D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected1D.java @@ -212,12 +212,13 @@ public class LocallyConnected1D extends SameDiffLayer { } @Override - public void applyGlobalConfigToLayer(NeuralNetConfiguration.Builder globalConfig) { + public void applyGlobalConfigToLayer(NeuralNetConfiguration.NeuralNetConfigurationBuilder globalConfig) { + NeuralNetConfiguration global_conf = globalConfig.build(); if (activation == null) { - activation = SameDiffLayerUtils.fromIActivation(globalConfig.getActivationFn()); + activation = SameDiffLayerUtils.fromIActivation(global_conf.getActivationFn()); } if (cm == null) { - cm = globalConfig.getConvolutionMode(); + cm = global_conf.getConvolutionMode(); } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected2D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected2D.java index b44055332..a33445ce7 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected2D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected2D.java @@ -229,12 +229,13 @@ public class LocallyConnected2D extends SameDiffLayer { } @Override - public void applyGlobalConfigToLayer(NeuralNetConfiguration.Builder globalConfig) { + public void applyGlobalConfigToLayer(NeuralNetConfiguration.NeuralNetConfigurationBuilder globalConfig) { + NeuralNetConfiguration gconf = globalConfig.build(); if (activation == null) { - activation = SameDiffLayerUtils.fromIActivation(globalConfig.getActivationFn()); + activation = SameDiffLayerUtils.fromIActivation(gconf.getActivationFn()); } if (cm == null) { - cm = globalConfig.getConvolutionMode(); + cm = gconf.getConvolutionMode(); } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LossLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LossLayer.java index e88a66298..2e89f7ee7 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LossLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LossLayer.java @@ -57,13 +57,15 @@ public class LossLayer extends FeedForwardLayer { @Override public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { - org.deeplearning4j.nn.layers.LossLayer ret = new org.deeplearning4j.nn.layers.LossLayer(conf, networkDataType); + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + org.deeplearning4j.nn.layers.LossLayer ret = new org.deeplearning4j.nn.layers.LossLayer(lconf, networkDataType); + ret.setListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); return ret; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/NoParamLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/NoParamLayer.java index 227650a5f..7d0c181f8 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/NoParamLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/NoParamLayer.java @@ -21,6 +21,7 @@ package org.deeplearning4j.nn.conf.layers; import lombok.NoArgsConstructor; +import net.brutex.ai.dnn.api.LayerType; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.GradientNormalization; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -30,10 +31,12 @@ import org.nd4j.linalg.learning.regularization.Regularization; import java.util.List; @NoArgsConstructor -public abstract class NoParamLayer extends Layer { +public abstract class NoParamLayer extends LayerConfiguration { protected NoParamLayer(Builder builder) { + super(builder); + setType(LayerType.POOL); } @Override diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/OutputLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/OutputLayer.java index d31ff854a..2616ed8d9 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/OutputLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/OutputLayer.java @@ -53,14 +53,15 @@ public class OutputLayer extends BaseOutputLayer { public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { LayerValidation.assertNInNOutSet("OutputLayer", getLayerName(), layerIndex, getNIn(), getNOut()); + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); - org.deeplearning4j.nn.layers.OutputLayer ret = new org.deeplearning4j.nn.layers.OutputLayer(conf, networkDataType); + org.deeplearning4j.nn.layers.OutputLayer ret = new org.deeplearning4j.nn.layers.OutputLayer(lconf, networkDataType); ret.setListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); return ret; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PReLULayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PReLULayer.java index 289009ad7..e44f7f709 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PReLULayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PReLULayer.java @@ -59,13 +59,14 @@ public class PReLULayer extends BaseLayer { @Override public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { - org.deeplearning4j.nn.layers.feedforward.PReLU ret = new org.deeplearning4j.nn.layers.feedforward.PReLU(conf, networkDataType); + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + org.deeplearning4j.nn.layers.feedforward.PReLU ret = new org.deeplearning4j.nn.layers.feedforward.PReLU(lconf, networkDataType); ret.setListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); return ret; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PrimaryCapsules.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PrimaryCapsules.java index 4d3f56a84..fc0c256f7 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PrimaryCapsules.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PrimaryCapsules.java @@ -417,7 +417,7 @@ public class PrimaryCapsules extends SameDiffLayer { } @Override - public E build() { + public E build() { return (E) new PrimaryCapsules(this); } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RecurrentAttentionLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RecurrentAttentionLayer.java index 161acc44e..10924fd90 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RecurrentAttentionLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RecurrentAttentionLayer.java @@ -150,9 +150,9 @@ public class RecurrentAttentionLayer extends SameDiffLayer { } @Override - public void applyGlobalConfigToLayer(NeuralNetConfiguration.Builder globalConfig) { + public void applyGlobalConfigToLayer(NeuralNetConfiguration.NeuralNetConfigurationBuilder globalConfig) { if (activation == null) { - activation = SameDiffLayerUtils.fromIActivation(globalConfig.getActivationFn()); + activation = SameDiffLayerUtils.fromIActivation(globalConfig.build().getActivationFn()); } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnLossLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnLossLayer.java index 376886cc4..1127d0be0 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnLossLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnLossLayer.java @@ -59,14 +59,17 @@ public class RnnLossLayer extends FeedForwardLayer { @Override public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { + + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + org.deeplearning4j.nn.layers.recurrent.RnnLossLayer ret = - new org.deeplearning4j.nn.layers.recurrent.RnnLossLayer(conf, networkDataType); + new org.deeplearning4j.nn.layers.recurrent.RnnLossLayer(lconf, networkDataType); ret.setListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); return ret; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnOutputLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnOutputLayer.java index 9f17c2cee..629e70da6 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnOutputLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnOutputLayer.java @@ -59,15 +59,16 @@ public class RnnOutputLayer extends BaseOutputLayer { public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { LayerValidation.assertNInNOutSet("RnnOutputLayer", getLayerName(), layerIndex, getNIn(), getNOut()); + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); org.deeplearning4j.nn.layers.recurrent.RnnOutputLayer ret = - new org.deeplearning4j.nn.layers.recurrent.RnnOutputLayer(conf, networkDataType); + new org.deeplearning4j.nn.layers.recurrent.RnnOutputLayer(lconf, networkDataType); ret.setListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); return ret; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SeparableConvolution2D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SeparableConvolution2D.java index a6efb86b1..34bc03086 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SeparableConvolution2D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SeparableConvolution2D.java @@ -79,7 +79,7 @@ public class SeparableConvolution2D extends ConvolutionLayer { } @Override - protected void initializeConstraints(org.deeplearning4j.nn.conf.layers.Layer.Builder builder) { + protected void initializeConstraints(LayerConfiguration.Builder builder) { super.initializeConstraints(builder); if (((Builder) builder).pointWiseConstraints != null) { if (constraints == null) { @@ -117,15 +117,16 @@ public class SeparableConvolution2D extends ConvolutionLayer { public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { LayerValidation.assertNInNOutSet("SeparableConvolution2D", getLayerName(), layerIndex, getNIn(), getNOut()); + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); org.deeplearning4j.nn.layers.convolution.SeparableConvolution2DLayer ret = - new org.deeplearning4j.nn.layers.convolution.SeparableConvolution2DLayer(conf, networkDataType); + new org.deeplearning4j.nn.layers.convolution.SeparableConvolution2DLayer(lconf, networkDataType); ret.setListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); return ret; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToBatchLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToBatchLayer.java index b8de7a4e4..ff4082075 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToBatchLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToBatchLayer.java @@ -67,14 +67,16 @@ public class SpaceToBatchLayer extends NoParamLayer { public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + org.deeplearning4j.nn.layers.convolution.SpaceToBatch ret = - new org.deeplearning4j.nn.layers.convolution.SpaceToBatch(conf, networkDataType); + new org.deeplearning4j.nn.layers.convolution.SpaceToBatch(lconf, networkDataType); ret.setListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); return ret; } @@ -130,7 +132,7 @@ public class SpaceToBatchLayer extends NoParamLayer { @NoArgsConstructor @Getter @Setter - public static class Builder> extends Layer.Builder { + public static class Builder> extends LayerConfiguration.Builder { /** * Block size for SpaceToBatch layer. Should be a length 2 array for the height and width diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToDepthLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToDepthLayer.java index b35092359..110d127b0 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToDepthLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToDepthLayer.java @@ -73,14 +73,16 @@ public class SpaceToDepthLayer extends NoParamLayer { public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + org.deeplearning4j.nn.layers.convolution.SpaceToDepth ret = - new org.deeplearning4j.nn.layers.convolution.SpaceToDepth(conf, networkDataType); + new org.deeplearning4j.nn.layers.convolution.SpaceToDepth(lconf, networkDataType); ret.setListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); return ret; } @@ -134,7 +136,7 @@ public class SpaceToDepthLayer extends NoParamLayer { @NoArgsConstructor @Getter @Setter - public static class Builder> extends Layer.Builder { + public static class Builder> extends LayerConfiguration.Builder { protected int blockSize; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling1DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling1DLayer.java index 267e67005..5d48dfa6b 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling1DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling1DLayer.java @@ -61,14 +61,16 @@ public class Subsampling1DLayer extends SubsamplingLayer { public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + org.deeplearning4j.nn.layers.convolution.subsampling.Subsampling1DLayer ret = - new org.deeplearning4j.nn.layers.convolution.subsampling.Subsampling1DLayer(conf, networkDataType); + new org.deeplearning4j.nn.layers.convolution.subsampling.Subsampling1DLayer(lconf, networkDataType); ret.setListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); return ret; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling3DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling3DLayer.java index cb643cd7b..d201c88b2 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling3DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling3DLayer.java @@ -113,14 +113,16 @@ public class Subsampling3DLayer extends NoParamLayer { public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, Collection iterationListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + org.deeplearning4j.nn.layers.convolution.subsampling.Subsampling3DLayer ret = - new org.deeplearning4j.nn.layers.convolution.subsampling.Subsampling3DLayer(conf, networkDataType); + new org.deeplearning4j.nn.layers.convolution.subsampling.Subsampling3DLayer(lconf, networkDataType); ret.setListeners(iterationListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); return ret; } @@ -338,7 +340,7 @@ public class Subsampling3DLayer extends NoParamLayer { @Setter @NoArgsConstructor protected static abstract class BaseSubsamplingBuilder> - extends Layer.Builder { + extends LayerConfiguration.Builder { protected org.deeplearning4j.nn.conf.layers.PoolingType poolingType = org.deeplearning4j.nn.conf.layers.PoolingType.MAX; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java index f1d546234..32983b01c 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java @@ -134,14 +134,16 @@ public class SubsamplingLayer extends NoParamLayer { public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + org.deeplearning4j.nn.layers.convolution.subsampling.SubsamplingLayer ret = - new org.deeplearning4j.nn.layers.convolution.subsampling.SubsamplingLayer(conf, networkDataType); + new org.deeplearning4j.nn.layers.convolution.subsampling.SubsamplingLayer(lconf, networkDataType); ret.setListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); return ret; } @@ -390,7 +392,7 @@ public class SubsamplingLayer extends NoParamLayer { @Getter @Setter protected static abstract class BaseSubsamplingBuilder> - extends Layer.Builder { + extends LayerConfiguration.Builder { protected org.deeplearning4j.nn.conf.layers.PoolingType poolingType = org.deeplearning4j.nn.conf.layers.PoolingType.MAX; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling1D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling1D.java index 6a012ed15..6f7a7c091 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling1D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling1D.java @@ -56,14 +56,17 @@ public class Upsampling1D extends BaseUpsamplingLayer { public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { + + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + org.deeplearning4j.nn.layers.convolution.upsampling.Upsampling1D ret = - new org.deeplearning4j.nn.layers.convolution.upsampling.Upsampling1D(conf, networkDataType); + new org.deeplearning4j.nn.layers.convolution.upsampling.Upsampling1D(lconf, networkDataType); ret.setListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); return ret; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling2D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling2D.java index bdbbb0c73..61693091a 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling2D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling2D.java @@ -63,14 +63,16 @@ public class Upsampling2D extends BaseUpsamplingLayer { public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + org.deeplearning4j.nn.layers.convolution.upsampling.Upsampling2D ret = - new org.deeplearning4j.nn.layers.convolution.upsampling.Upsampling2D(conf, networkDataType); + new org.deeplearning4j.nn.layers.convolution.upsampling.Upsampling2D(lconf, networkDataType); ret.setListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); return ret; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling3D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling3D.java index ef5d832b4..f4d5fa280 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling3D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling3D.java @@ -61,14 +61,18 @@ public class Upsampling3D extends BaseUpsamplingLayer { public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, Collection iterationListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + org.deeplearning4j.nn.layers.convolution.upsampling.Upsampling3D ret = - new org.deeplearning4j.nn.layers.convolution.upsampling.Upsampling3D(conf, networkDataType); + new org.deeplearning4j.nn.layers.convolution.upsampling.Upsampling3D(lconf, networkDataType); + + ret.setListeners(iterationListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); return ret; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPadding1DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPadding1DLayer.java index 98f6f8077..aa0268be1 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPadding1DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPadding1DLayer.java @@ -66,13 +66,15 @@ public class ZeroPadding1DLayer extends NoParamLayer { public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + org.deeplearning4j.nn.layers.convolution.ZeroPadding1DLayer ret = - new org.deeplearning4j.nn.layers.convolution.ZeroPadding1DLayer(conf, networkDataType); + new org.deeplearning4j.nn.layers.convolution.ZeroPadding1DLayer(lconf, networkDataType); ret.setListeners(trainingListeners); ret.setIndex(layerIndex); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); return ret; } @@ -125,7 +127,7 @@ public class ZeroPadding1DLayer extends NoParamLayer { @Getter @Setter - public static class Builder extends Layer.Builder { + public static class Builder extends LayerConfiguration.Builder { /** * Padding value for left and right. Must be length 2 array diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPadding3DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPadding3DLayer.java index f6b97cfcc..21d77ae03 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPadding3DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPadding3DLayer.java @@ -53,13 +53,15 @@ public class ZeroPadding3DLayer extends NoParamLayer { public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, Collection iterationListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + org.deeplearning4j.nn.layers.convolution.ZeroPadding3DLayer ret = - new org.deeplearning4j.nn.layers.convolution.ZeroPadding3DLayer(conf, networkDataType); + new org.deeplearning4j.nn.layers.convolution.ZeroPadding3DLayer(lconf, networkDataType); ret.setListeners(iterationListeners); ret.setIndex(layerIndex); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); return ret; } @@ -114,7 +116,7 @@ public class ZeroPadding3DLayer extends NoParamLayer { @Getter @Setter - public static class Builder extends Layer.Builder { + public static class Builder extends LayerConfiguration.Builder { /** * [padLeftD, padRightD, padLeftH, padRightH, padLeftW, padRightW] diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPaddingLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPaddingLayer.java index 459205609..0d0e85d56 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPaddingLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPaddingLayer.java @@ -70,13 +70,15 @@ public class ZeroPaddingLayer extends NoParamLayer { public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + org.deeplearning4j.nn.layers.convolution.ZeroPaddingLayer ret = - new org.deeplearning4j.nn.layers.convolution.ZeroPaddingLayer(conf, networkDataType); + new org.deeplearning4j.nn.layers.convolution.ZeroPaddingLayer(lconf, networkDataType); ret.setListeners(trainingListeners); ret.setIndex(layerIndex); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); return ret; } @@ -118,7 +120,7 @@ public class ZeroPaddingLayer extends NoParamLayer { @Getter @Setter - public static class Builder extends Layer.Builder { + public static class Builder extends LayerConfiguration.Builder { /** * Padding value for top, bottom, left, and right. Must be length 4 array diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping1D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping1D.java index fd2546019..2124e9eb9 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping1D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping1D.java @@ -25,7 +25,7 @@ import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.InputTypeUtil; -import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.NoParamLayer; import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.layers.convolution.Cropping1DLayer; @@ -76,12 +76,14 @@ public class Cropping1D extends NoParamLayer { public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { - Cropping1DLayer ret = new Cropping1DLayer(conf, networkDataType); + setNetConfiguration(conf); + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + Cropping1DLayer ret = new Cropping1DLayer(lconf, networkDataType); ret.setListeners(trainingListeners); ret.setIndex(layerIndex); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); return ret; } @@ -113,7 +115,7 @@ public class Cropping1D extends NoParamLayer { @Getter @Setter - public static class Builder extends Layer.Builder { + public static class Builder extends LayerConfiguration.Builder { /** * Cropping amount for top/bottom (in that order). Must be length 1 or 2 array. */ diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping2D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping2D.java index 29aad71bd..604a269cb 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping2D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping2D.java @@ -26,7 +26,7 @@ import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.InputTypeUtil; -import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.NoParamLayer; import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.layers.convolution.Cropping2DLayer; @@ -92,12 +92,14 @@ public class Cropping2D extends NoParamLayer { public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { - Cropping2DLayer ret = new Cropping2DLayer(conf, networkDataType); + setNetConfiguration(conf); + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + Cropping2DLayer ret = new Cropping2DLayer(lconf, networkDataType); ret.setListeners(trainingListeners); ret.setIndex(layerIndex); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); return ret; } @@ -131,7 +133,7 @@ public class Cropping2D extends NoParamLayer { @Getter @Setter - public static class Builder extends Layer.Builder { + public static class Builder extends LayerConfiguration.Builder { /** * Cropping amount for top/bottom/left/right (in that order). A length 4 array. diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping3D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping3D.java index 1ab34b17b..c22c8f429 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping3D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping3D.java @@ -25,7 +25,7 @@ import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.InputTypeUtil; -import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.NoParamLayer; import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.layers.convolution.Cropping3DLayer; @@ -84,12 +84,14 @@ public class Cropping3D extends NoParamLayer { public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, Collection iterationListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { - Cropping3DLayer ret = new Cropping3DLayer(conf, networkDataType); + setNetConfiguration(conf); + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + Cropping3DLayer ret = new Cropping3DLayer(lconf, networkDataType); ret.setListeners(iterationListeners); ret.setIndex(layerIndex); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); return ret; } @@ -121,7 +123,7 @@ public class Cropping3D extends NoParamLayer { @Getter @Setter - public static class Builder extends Layer.Builder { + public static class Builder extends LayerConfiguration.Builder { /** * Cropping amount, a length 6 array, i.e. crop left depth, crop right depth, crop left height, crop right height, crop left width, crop right width diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/ElementWiseMultiplicationLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/ElementWiseMultiplicationLayer.java index 9eea40cfc..dc7e9b93d 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/ElementWiseMultiplicationLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/ElementWiseMultiplicationLayer.java @@ -26,6 +26,7 @@ import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.FeedForwardLayer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.conf.memory.MemoryReport; import org.deeplearning4j.nn.params.ElementWiseParamInitializer; @@ -58,18 +59,21 @@ public class ElementWiseMultiplicationLayer extends org.deeplearning4j.nn.conf.l @Override public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { + setNetConfiguration(conf); if (this.nIn != this.nOut) { throw new IllegalStateException("Element wise layer must have the same input and output size. Got nIn=" + nIn + ", nOut=" + nOut); } + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); org.deeplearning4j.nn.layers.feedforward.elementwise.ElementWiseMultiplicationLayer ret = - new org.deeplearning4j.nn.layers.feedforward.elementwise.ElementWiseMultiplicationLayer(conf, networkDataType); + new org.deeplearning4j.nn.layers.feedforward.elementwise.ElementWiseMultiplicationLayer(lconf, networkDataType); + ret.setListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); return ret; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayer.java index ba5674bbb..35a4cae8d 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayer.java @@ -29,7 +29,7 @@ import org.deeplearning4j.nn.conf.GradientNormalization; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.params.FrozenLayerParamInitializer; import org.deeplearning4j.optimize.api.TrainingListener; @@ -38,36 +38,32 @@ import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.learning.config.IUpdater; import org.nd4j.linalg.learning.regularization.Regularization; import com.fasterxml.jackson.annotation.JsonProperty; -import com.fasterxml.jackson.databind.annotation.JsonDeserialize; import java.util.Collection; import java.util.List; @EqualsAndHashCode(callSuper = false) -public class FrozenLayer extends Layer { +public class FrozenLayer extends LayerConfiguration { - @Getter - protected Layer layer; + /** + * A layer configuration, only if this layer config has been created from another one + */ + @Getter @Setter + private LayerConfiguration innerConfiguration; private FrozenLayer(Builder builder) { super(builder); - this.layer = builder.layer; + this.innerConfiguration = builder.layer; } - public FrozenLayer(@JsonProperty("layer") Layer layer) { - this.layer = layer; - } - - public NeuralNetConfiguration getInnerConf(NeuralNetConfiguration conf) { - NeuralNetConfiguration nnc = conf.clone(); - nnc.setLayer(layer); - return nnc; + public FrozenLayer(@JsonProperty("layer") LayerConfiguration layer) { + this.innerConfiguration = layer; } @Override - public Layer clone() { + public LayerConfiguration clone() { FrozenLayer l = (FrozenLayer) super.clone(); - l.layer = layer.clone(); + l.innerConfiguration = innerConfiguration.clone(); return l; } @@ -77,17 +73,17 @@ public class FrozenLayer extends Layer { boolean initializeParams, DataType networkDataType) { //Need to be able to instantiate a layer, from a config - for JSON -> net type situations - org.deeplearning4j.nn.api.Layer underlying = layer.instantiate(getInnerConf(conf), trainingListeners, + org.deeplearning4j.nn.api.Layer underlying = innerConfiguration.instantiate(getNetConfiguration(), trainingListeners, layerIndex, layerParamsView, initializeParams, networkDataType); - NeuralNetConfiguration nncUnderlying = underlying.conf(); - if (nncUnderlying.variables() != null) { - List vars = nncUnderlying.variables(true); - nncUnderlying.clearVariables(); - conf.clearVariables(); + NeuralNetConfiguration nncUnderlying = underlying.getNetConfiguration(); + if (nncUnderlying.netWideVariables() != null) { + List vars = nncUnderlying.netWideVariables(true); + nncUnderlying.clearNetWideVariable(); + conf.clearNetWideVariable(); for (String s : vars) { - conf.variables(false).add(s); - nncUnderlying.variables(false).add(s); + conf.netWideVariables(false).add(s); + nncUnderlying.netWideVariables(false).add(s); } } @@ -101,17 +97,17 @@ public class FrozenLayer extends Layer { @Override public InputType getOutputType(int layerIndex, InputType inputType) { - return layer.getOutputType(layerIndex, inputType); + return innerConfiguration.getOutputType(layerIndex, inputType); } @Override public void setNIn(InputType inputType, boolean override) { - layer.setNIn(inputType, override); + innerConfiguration.setNIn(inputType, override); } @Override public InputPreProcessor getPreProcessorForInputType(InputType inputType) { - return layer.getPreProcessorForInputType(inputType); + return innerConfiguration.getPreProcessorForInputType(inputType); } @Override @@ -131,38 +127,38 @@ public class FrozenLayer extends Layer { @Override public GradientNormalization getGradientNormalization() { - return layer.getGradientNormalization(); + return innerConfiguration.getGradientNormalization(); } @Override public double getGradientNormalizationThreshold() { - return layer.getGradientNormalizationThreshold(); + return innerConfiguration.getGradientNormalizationThreshold(); } @Override public LayerMemoryReport getMemoryReport(InputType inputType) { - return layer.getMemoryReport(inputType); + return innerConfiguration.getMemoryReport(inputType); } @Override public void setLayerName(String layerName) { super.setLayerName(layerName); - layer.setLayerName(layerName); + innerConfiguration.setLayerName(layerName); } @Override public void setConstraints(List constraints) { this.constraints = constraints; - this.layer.setConstraints(constraints); + this.innerConfiguration.setConstraints(constraints); } @Getter @Setter - public static class Builder extends Layer.Builder { + public static class Builder extends LayerConfiguration.Builder { - private Layer layer; + private LayerConfiguration layer; - public Builder layer(Layer layer) { + public Builder layer(LayerConfiguration layer) { this.setLayer(layer); return this; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayerWithBackprop.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayerWithBackprop.java index 53d7ff914..ae438958f 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayerWithBackprop.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayerWithBackprop.java @@ -25,7 +25,7 @@ import lombok.EqualsAndHashCode; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.wrapper.BaseWrapperLayer; import org.deeplearning4j.nn.params.FrozenLayerWithBackpropParamInitializer; import org.deeplearning4j.optimize.api.TrainingListener; @@ -42,18 +42,19 @@ import java.util.List; @EqualsAndHashCode(callSuper = false) public class FrozenLayerWithBackprop extends BaseWrapperLayer { - public FrozenLayerWithBackprop(@JsonProperty("layer") Layer layer) { + public FrozenLayerWithBackprop(@JsonProperty("layer") LayerConfiguration layer) { super(layer); + underlying = layer; } public NeuralNetConfiguration getInnerConf(NeuralNetConfiguration conf) { NeuralNetConfiguration nnc = conf.clone(); - nnc.setLayer(underlying); + nnc.getLayerConfigurations().add(0, underlying); return nnc; } @Override - public Layer clone() { + public LayerConfiguration clone() { FrozenLayerWithBackprop l = (FrozenLayerWithBackprop) super.clone(); l.underlying = underlying.clone(); return l; @@ -65,18 +66,18 @@ public class FrozenLayerWithBackprop extends BaseWrapperLayer { boolean initializeParams, DataType networkDataType) { //Need to be able to instantiate a layer, from a config - for JSON -> net type situations - org.deeplearning4j.nn.api.Layer underlying = getUnderlying().instantiate(getInnerConf(conf), trainingListeners, + org.deeplearning4j.nn.api.Layer underlying = getUnderlying().instantiate(conf, trainingListeners, layerIndex, layerParamsView, initializeParams, networkDataType); - NeuralNetConfiguration nncUnderlying = underlying.conf(); + NeuralNetConfiguration nncUnderlying = underlying.getNetConfiguration(); - if (nncUnderlying.variables() != null) { - List vars = nncUnderlying.variables(true); - nncUnderlying.clearVariables(); - conf.clearVariables(); + if (nncUnderlying.netWideVariables() != null) { + List vars = nncUnderlying.netWideVariables(true); + nncUnderlying.clearNetWideVariable(); + conf.clearNetWideVariable(); for (String s : vars) { - conf.variables(false).add(s); - nncUnderlying.variables(false).add(s); + conf.netWideVariables(false).add(s); + nncUnderlying.netWideVariables(false).add(s); } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/RepeatVector.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/RepeatVector.java index 127502b68..ba85f879c 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/RepeatVector.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/RepeatVector.java @@ -26,6 +26,7 @@ import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.RNNFormat; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.FeedForwardLayer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.conf.memory.MemoryReport; import org.deeplearning4j.nn.params.EmptyParamInitializer; @@ -65,13 +66,15 @@ public class RepeatVector extends FeedForwardLayer { public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { - org.deeplearning4j.nn.layers.RepeatVector ret = new org.deeplearning4j.nn.layers.RepeatVector(conf, networkDataType); + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + + org.deeplearning4j.nn.layers.RepeatVector ret = new org.deeplearning4j.nn.layers.RepeatVector(lconf, networkDataType); ret.setListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); return ret; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/Yolo2OutputLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/Yolo2OutputLayer.java index 1229e8cfd..d2d4bec81 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/Yolo2OutputLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/Yolo2OutputLayer.java @@ -31,6 +31,7 @@ import org.deeplearning4j.nn.conf.GradientNormalization; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.conf.preprocessor.FeedForwardToCnnPreProcessor; import org.deeplearning4j.nn.params.EmptyParamInitializer; @@ -51,7 +52,7 @@ import java.util.Map; @Data @EqualsAndHashCode(callSuper = false) -public class Yolo2OutputLayer extends org.deeplearning4j.nn.conf.layers.Layer { +public class Yolo2OutputLayer extends LayerConfiguration { private double lambdaCoord; private double lambdaNoObj; @@ -79,14 +80,16 @@ public class Yolo2OutputLayer extends org.deeplearning4j.nn.conf.layers.Layer { @Override public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + org.deeplearning4j.nn.layers.objdetect.Yolo2OutputLayer ret = - new org.deeplearning4j.nn.layers.objdetect.Yolo2OutputLayer(conf, networkDataType); + new org.deeplearning4j.nn.layers.objdetect.Yolo2OutputLayer(lconf, networkDataType); ret.setListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); return ret; } @@ -156,7 +159,7 @@ public class Yolo2OutputLayer extends org.deeplearning4j.nn.conf.layers.Layer { @Getter @Setter - public static class Builder extends org.deeplearning4j.nn.conf.layers.Layer.Builder { + public static class Builder extends LayerConfiguration.Builder { /** * Loss function coefficient for position and size/scale components of the loss function. Default (as per diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/Bidirectional.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/Bidirectional.java index 388e131cd..5eda741e4 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/Bidirectional.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/Bidirectional.java @@ -29,7 +29,7 @@ import org.deeplearning4j.nn.conf.RNNFormat; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.BaseRecurrentLayer; import org.deeplearning4j.nn.conf.layers.FeedForwardLayer; -import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.wrapper.BaseWrapperLayer; import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.layers.recurrent.BidirectionalLayer; @@ -47,13 +47,12 @@ import java.util.List; import java.util.Map; import static org.nd4j.linalg.indexing.NDArrayIndex.interval; -import static org.nd4j.linalg.indexing.NDArrayIndex.point; @NoArgsConstructor @Data @EqualsAndHashCode(callSuper = true, exclude = {"initializer"}) @JsonIgnoreProperties({"initializer"}) -public class Bidirectional extends Layer { +public class Bidirectional extends LayerConfiguration { /** * This Mode enumeration defines how the activations for the forward and backward networks should be combined.
@@ -68,8 +67,8 @@ public class Bidirectional extends Layer { ADD, MUL, AVERAGE, CONCAT } - private Layer fwd; - private Layer bwd; + private LayerConfiguration fwd; + private LayerConfiguration bwd; private Mode mode; private transient BidirectionalParamInitializer initializer; @@ -82,7 +81,7 @@ public class Bidirectional extends Layer { * * @param layer layer to wrap */ - public Bidirectional(@NonNull Layer layer) { + public Bidirectional(@NonNull LayerConfiguration layer) { this(Mode.CONCAT, layer); } @@ -92,7 +91,7 @@ public class Bidirectional extends Layer { * @param mode Mode to use to combine activations. See {@link Mode} for details * @param layer layer to wrap */ - public Bidirectional(@NonNull Mode mode, @NonNull Layer layer) { + public Bidirectional(@NonNull Mode mode, @NonNull LayerConfiguration layer) { if (!(layer instanceof BaseRecurrentLayer || layer instanceof LastTimeStep || layer instanceof BaseWrapperLayer)) { throw new IllegalArgumentException("Cannot wrap a non-recurrent layer: " @@ -128,6 +127,7 @@ public class Bidirectional extends Layer { public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); NeuralNetConfiguration c1 = conf.clone(); NeuralNetConfiguration c2 = conf.clone(); c1.setLayer(fwd); @@ -140,10 +140,10 @@ public class Bidirectional extends Layer { org.deeplearning4j.nn.api.Layer b = bwd.instantiate(c2, trainingListeners, layerIndex, bp, initializeParams, networkDataType); - BidirectionalLayer ret = new BidirectionalLayer(conf, f, b, layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + BidirectionalLayer ret = new BidirectionalLayer(lconf, f, b, layerParamsView); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); return ret; } @@ -238,12 +238,12 @@ public class Bidirectional extends Layer { @AllArgsConstructor @Getter @Setter - public static class Builder extends Layer.Builder { + public static class Builder extends LayerConfiguration.Builder { private Mode mode; - private Layer layer; + private LayerConfiguration layer; - public void setLayer(Layer layer) { + public void setLayer(LayerConfiguration layer) { rnnLayer(layer); } @@ -252,7 +252,7 @@ public class Bidirectional extends Layer { return this; } - public Builder rnnLayer(Layer layer) { + public Builder rnnLayer(LayerConfiguration layer) { if (!(layer instanceof BaseRecurrentLayer || layer instanceof LastTimeStep || layer instanceof BaseWrapperLayer)) { throw new IllegalArgumentException("Cannot wrap a non-recurrent layer: " diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/LastTimeStep.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/LastTimeStep.java index ce87b8051..a869999dc 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/LastTimeStep.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/LastTimeStep.java @@ -22,7 +22,7 @@ package org.deeplearning4j.nn.conf.layers.recurrent; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.wrapper.BaseWrapperLayer; import org.deeplearning4j.nn.layers.recurrent.LastTimeStepLayer; import org.deeplearning4j.optimize.api.TrainingListener; @@ -35,12 +35,12 @@ public class LastTimeStep extends BaseWrapperLayer { private LastTimeStep() {} - public LastTimeStep(Layer underlying) { + public LastTimeStep(LayerConfiguration underlying) { super(underlying); this.layerName = underlying.getLayerName(); // needed for keras import to match names } - public Layer getUnderlying() { + public LayerConfiguration getUnderlying() { return underlying; } @@ -49,8 +49,9 @@ public class LastTimeStep extends BaseWrapperLayer { public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); NeuralNetConfiguration conf2 = conf.clone(); - conf2.setLayer(((LastTimeStep) conf2.getLayer()).getUnderlying()); + conf2.setLayer(((LastTimeStep) lconf).getUnderlying()); return new LastTimeStepLayer(underlying.instantiate(conf2, trainingListeners, layerIndex, layerParamsView, initializeParams, networkDataType)); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/SimpleRnn.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/SimpleRnn.java index 7cbebeaf2..bda494c1d 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/SimpleRnn.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/SimpleRnn.java @@ -26,6 +26,7 @@ import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.BaseRecurrentLayer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.LayerValidation; import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.params.SimpleRnnParamInitializer; @@ -55,15 +56,16 @@ public class SimpleRnn extends BaseRecurrentLayer { public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { LayerValidation.assertNInNOutSet("SimpleRnn", getLayerName(), layerIndex, getNIn(), getNOut()); + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); org.deeplearning4j.nn.layers.recurrent.SimpleRnn ret = - new org.deeplearning4j.nn.layers.recurrent.SimpleRnn(conf, networkDataType); + new org.deeplearning4j.nn.layers.recurrent.SimpleRnn(lconf, networkDataType); ret.setListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); return ret; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/TimeDistributed.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/TimeDistributed.java index 54a93b904..7ab6370b7 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/TimeDistributed.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/TimeDistributed.java @@ -27,7 +27,7 @@ import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.RNNFormat; import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.wrapper.BaseWrapperLayer; import org.deeplearning4j.nn.layers.recurrent.TimeDistributedLayer; import org.deeplearning4j.optimize.api.TrainingListener; @@ -46,20 +46,22 @@ public class TimeDistributed extends BaseWrapperLayer { /** * @param underlying Underlying (internal) layer - should be a feed forward type such as DenseLayerConfiguration */ - public TimeDistributed(@JsonProperty("underlying") @NonNull Layer underlying, @JsonProperty("rnnDataFormat") RNNFormat rnnDataFormat) { + public TimeDistributed(@JsonProperty("underlying") @NonNull LayerConfiguration underlying, @JsonProperty("rnnDataFormat") RNNFormat rnnDataFormat) { super(underlying); this.rnnDataFormat = rnnDataFormat; } - public TimeDistributed(Layer underlying){ + public TimeDistributed(LayerConfiguration underlying){ super(underlying); } @Override public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + NeuralNetConfiguration conf2 = conf.clone(); - conf2.setLayer(((TimeDistributed) conf2.getLayer()).getUnderlying()); + conf2.setLayer(((TimeDistributed) lconf).getUnderlying()); return new TimeDistributedLayer(underlying.instantiate(conf2, trainingListeners, layerIndex, layerParamsView, initializeParams, networkDataType), rnnDataFormat); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java index 71bb2a95a..18c4601c8 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java @@ -30,7 +30,7 @@ import org.deeplearning4j.nn.conf.GradientNormalization; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.params.SameDiffParamInitializer; import org.deeplearning4j.nn.weights.WeightInit; @@ -54,7 +54,7 @@ import java.util.Map; @Slf4j @Data @EqualsAndHashCode(callSuper = true, doNotUseGetters = true) -public abstract class AbstractSameDiffLayer extends Layer { +public abstract class AbstractSameDiffLayer extends LayerConfiguration { protected List regularization; protected List regularizationBias; @@ -121,7 +121,7 @@ public abstract class AbstractSameDiffLayer extends Layer { } - public void applyGlobalConfigToLayer(NeuralNetConfiguration.Builder globalConfig) { + public void applyGlobalConfigToLayer(NeuralNetConfiguration.NeuralNetConfigurationBuilder globalConfig) { //Default implementation: no op } @@ -187,24 +187,25 @@ public abstract class AbstractSameDiffLayer extends Layer { WeightInitUtil.initWeights(fanIn, fanOut, array.shape(), weightInit, null, paramReshapeOrder(null), array); } - public void applyGlobalConfig(NeuralNetConfiguration.Builder b) { + public void applyGlobalConfig(NeuralNetConfiguration.NeuralNetConfigurationBuilder b) { + NeuralNetConfiguration bConf = b.build(); if (regularization == null || regularization.isEmpty()) { - regularization = b.getRegularization(); + regularization = bConf.getRegularization(); } if (regularizationBias == null || regularizationBias.isEmpty()) { - regularizationBias = b.getRegularizationBias(); + regularizationBias = bConf.getRegularizationBias(); } if (updater == null) { - updater = b.getIUpdater(); + updater = bConf.getIUpdater(); } if (biasUpdater == null) { - biasUpdater = b.getBiasUpdater(); + biasUpdater = bConf.getBiasUpdater(); } if (gradientNormalization == null) { - gradientNormalization = b.getGradientNormalization(); + gradientNormalization = bConf.getGradientNormalization(); } if (Double.isNaN(gradientNormalizationThreshold)) { - gradientNormalizationThreshold = b.getGradientNormalizationThreshold(); + gradientNormalizationThreshold = bConf.getGradientNormalizationThreshold(); } applyGlobalConfigToLayer(b); @@ -234,7 +235,7 @@ public abstract class AbstractSameDiffLayer extends Layer { @Getter @Setter - public static abstract class Builder> extends Layer.Builder { + public static abstract class Builder> extends LayerConfiguration.Builder { protected List regularization = new ArrayList<>(); protected List regularizationBias = new ArrayList<>(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffLayer.java index ea8fc2b09..cb16d2f26 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffLayer.java @@ -24,6 +24,7 @@ import lombok.*; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.weights.IWeightInit; import org.deeplearning4j.nn.weights.WeightInit; import org.deeplearning4j.optimize.api.TrainingListener; @@ -85,16 +86,19 @@ public abstract class SameDiffLayer extends AbstractSameDiffLayer { public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + org.deeplearning4j.nn.layers.samediff.SameDiffLayer ret = - new org.deeplearning4j.nn.layers.samediff.SameDiffLayer(conf, networkDataType); + new org.deeplearning4j.nn.layers.samediff.SameDiffLayer(lconf, networkDataType); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); return ret; } + @SuppressWarnings("unchecked") @Getter @Setter diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffOutputLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffOutputLayer.java index d781dd244..8fa7fd4d0 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffOutputLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffOutputLayer.java @@ -21,6 +21,7 @@ package org.deeplearning4j.nn.conf.layers.samediff; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.optimize.api.TrainingListener; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; @@ -74,13 +75,15 @@ public abstract class SameDiffOutputLayer extends AbstractSameDiffLayer { public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + org.deeplearning4j.nn.layers.samediff.SameDiffOutputLayer ret = - new org.deeplearning4j.nn.layers.samediff.SameDiffOutputLayer(conf, networkDataType); + new org.deeplearning4j.nn.layers.samediff.SameDiffOutputLayer(lconf, networkDataType); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); return ret; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffVertex.java index 94a13ffec..cfec8d653 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffVertex.java @@ -147,30 +147,31 @@ public abstract class SameDiffVertex extends GraphVertex implements TrainingConf } - public void applyGlobalConfig(NeuralNetConfiguration.Builder b) { + public void applyGlobalConfig(NeuralNetConfiguration b_conf) { + if(regularization == null || regularization.isEmpty()){ - regularization = b.getRegularization(); + regularization = b_conf.getRegularization(); } if(regularizationBias == null || regularizationBias.isEmpty()){ - regularizationBias = b.getRegularizationBias(); + regularizationBias = b_conf.getRegularizationBias(); } if (updater == null) { - updater = b.getIUpdater(); + updater = b_conf.getIUpdater(); } if (biasUpdater == null) { - biasUpdater = b.getBiasUpdater(); + biasUpdater = b_conf.getBiasUpdater(); } if (gradientNormalization == null) { - gradientNormalization = b.getGradientNormalization(); + gradientNormalization = b_conf.getGradientNormalization(); } if (Double.isNaN(gradientNormalizationThreshold)) { - gradientNormalizationThreshold = b.getGradientNormalizationThreshold(); + gradientNormalizationThreshold = b_conf.getGradientNormalizationThreshold(); } - applyGlobalConfigToLayer(b); + applyGlobalConfigToLayer(b_conf); } - public void applyGlobalConfigToLayer(NeuralNetConfiguration.Builder globalConfig) { + public void applyGlobalConfigToLayer(NeuralNetConfiguration globalConfig) { //Default implementation: no op } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/util/MaskLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/util/MaskLayer.java index 181d32b4c..bd39eb828 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/util/MaskLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/util/MaskLayer.java @@ -25,6 +25,7 @@ import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.NoParamLayer; import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.params.EmptyParamInitializer; @@ -43,12 +44,13 @@ public class MaskLayer extends NoParamLayer { public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { - org.deeplearning4j.nn.layers.util.MaskLayer ret = new org.deeplearning4j.nn.layers.util.MaskLayer(conf, networkDataType); + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + org.deeplearning4j.nn.layers.util.MaskLayer ret = new org.deeplearning4j.nn.layers.util.MaskLayer(lconf, networkDataType); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); return ret; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/util/MaskZeroLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/util/MaskZeroLayer.java index 8a3d309a5..7f11874e8 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/util/MaskZeroLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/util/MaskZeroLayer.java @@ -24,7 +24,7 @@ import lombok.*; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.wrapper.BaseWrapperLayer; import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.optimize.api.TrainingListener; @@ -49,7 +49,7 @@ public class MaskZeroLayer extends BaseWrapperLayer { } - public MaskZeroLayer(@JsonProperty("underlying") Layer underlying, @JsonProperty("maskingValue") double maskingValue) { + public MaskZeroLayer(@JsonProperty("underlying") LayerConfiguration underlying, @JsonProperty("maskingValue") double maskingValue) { this.underlying = underlying; this.maskingValue = maskingValue; } @@ -61,7 +61,7 @@ public class MaskZeroLayer extends BaseWrapperLayer { boolean initializeParams, DataType networkDataType) { NeuralNetConfiguration conf2 = conf.clone(); - conf2.setLayer(((BaseWrapperLayer) conf2.getLayer()).getUnderlying()); + conf2.setLayer(((BaseWrapperLayer) this).getUnderlying()); org.deeplearning4j.nn.api.Layer underlyingLayer = underlying.instantiate(conf2, trainingListeners, layerIndex, layerParamsView, initializeParams, networkDataType); @@ -102,12 +102,12 @@ public class MaskZeroLayer extends BaseWrapperLayer { @NoArgsConstructor @Getter @Setter - public static class Builder extends Layer.Builder { + public static class Builder extends LayerConfiguration.Builder { - private Layer underlying; + private LayerConfiguration underlying; private double maskValue; - public Builder setUnderlying(Layer underlying) { + public Builder setUnderlying(LayerConfiguration underlying) { this.underlying = underlying; return this; } @@ -117,7 +117,7 @@ public class MaskZeroLayer extends BaseWrapperLayer { return this; } - public Builder underlying(Layer underlying){ + public Builder underlying(LayerConfiguration underlying){ setUnderlying(underlying); return this; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/VariationalAutoencoder.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/VariationalAutoencoder.java index ca1f10bd0..4e6a0c41c 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/VariationalAutoencoder.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/VariationalAutoencoder.java @@ -26,6 +26,7 @@ import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.BasePretrainNetwork; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.LayerValidation; import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.conf.memory.MemoryReport; @@ -68,15 +69,16 @@ public class VariationalAutoencoder extends BasePretrainNetwork { int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { LayerValidation.assertNInNOutSet("VariationalAutoencoder", getLayerName(), layerIndex, getNIn(), getNOut()); + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); org.deeplearning4j.nn.layers.variational.VariationalAutoencoder ret = - new org.deeplearning4j.nn.layers.variational.VariationalAutoencoder(conf, networkDataType); + new org.deeplearning4j.nn.layers.variational.VariationalAutoencoder(lconf, networkDataType); ret.setListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); return ret; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/wrapper/BaseWrapperLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/wrapper/BaseWrapperLayer.java index ca90ee7a1..2495fbd56 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/wrapper/BaseWrapperLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/wrapper/BaseWrapperLayer.java @@ -22,11 +22,13 @@ package org.deeplearning4j.nn.conf.layers.wrapper; import lombok.Data; import lombok.EqualsAndHashCode; +import lombok.NonNull; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.GradientNormalization; import org.deeplearning4j.nn.conf.InputPreProcessor; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.params.WrapperLayerParamInitializer; import org.nd4j.linalg.learning.regularization.Regularization; @@ -35,9 +37,24 @@ import java.util.List; @Data @EqualsAndHashCode(callSuper = false) -public abstract class BaseWrapperLayer extends Layer { +public abstract class BaseWrapperLayer extends LayerConfiguration { - protected Layer underlying; + /** + * Set the net configuration for this configuration as well as for the underlying layer + * (if not null there) + * + * @param netConfiguration the neural net configuration + */ + @Override + public void setNetConfiguration(NeuralNetConfiguration netConfiguration) { + super.setNetConfiguration(netConfiguration); + if(getUnderlying().getNetConfiguration() == null) { + getUnderlying().setNetConfiguration( + netConfiguration); //also set netconf for underlying if not set + } + } + + protected LayerConfiguration underlying; protected BaseWrapperLayer(Builder builder) { super(builder); @@ -45,8 +62,9 @@ public abstract class BaseWrapperLayer extends Layer { protected BaseWrapperLayer() {} - public BaseWrapperLayer(Layer underlying) { + public BaseWrapperLayer(LayerConfiguration underlying) { this.underlying = underlying; + this.setNetConfiguration(underlying.getNetConfiguration()); } @Override diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/ocnn/OCNNOutputLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/ocnn/OCNNOutputLayer.java index 696d63f5d..8469c6f62 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/ocnn/OCNNOutputLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/ocnn/OCNNOutputLayer.java @@ -25,6 +25,7 @@ import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.BaseOutputLayer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.LayerValidation; import org.deeplearning4j.nn.layers.ocnn.OCNNParamInitializer; import org.deeplearning4j.optimize.api.TrainingListener; @@ -102,15 +103,16 @@ public class OCNNOutputLayer extends BaseOutputLayer { public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { LayerValidation.assertNInNOutSet("OCNNOutputLayer", getLayerName(), layerIndex, getNIn(), getNOut()); + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); org.deeplearning4j.nn.layers.ocnn.OCNNOutputLayer ret = - new org.deeplearning4j.nn.layers.ocnn.OCNNOutputLayer(conf, networkDataType); + new org.deeplearning4j.nn.layers.ocnn.OCNNOutputLayer(lconf, networkDataType); ret.setListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(lconf); ret.setActivation(activationFn); if (lastEpochSinceRUpdated == 0 && configureR) { paramTable.get(OCNNParamInitializer.R_KEY).putScalar(0, initialRValue); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/BaseNetConfigDeserializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/BaseNetConfigDeserializer.java index abd52c0c3..c6a2cbb26 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/BaseNetConfigDeserializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/BaseNetConfigDeserializer.java @@ -26,11 +26,10 @@ import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.distribution.Distribution; import org.deeplearning4j.nn.conf.layers.BaseLayer; import org.deeplearning4j.nn.conf.layers.BaseOutputLayer; -import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.weights.*; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.activations.IActivation; -import org.nd4j.linalg.activations.impl.*; import org.nd4j.linalg.learning.config.*; import org.nd4j.linalg.learning.regularization.L1Regularization; import org.nd4j.linalg.learning.regularization.Regularization; @@ -38,7 +37,6 @@ import org.nd4j.linalg.learning.regularization.WeightDecay; import org.nd4j.linalg.lossfunctions.ILossFunction; import org.nd4j.linalg.lossfunctions.impl.*; import com.fasterxml.jackson.core.JsonParser; -import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.DeserializationContext; import com.fasterxml.jackson.databind.JsonDeserializer; import com.fasterxml.jackson.databind.JsonMappingException; @@ -66,8 +64,8 @@ public abstract class BaseNetConfigDeserializer extends StdDeserializer im public abstract T deserialize(JsonParser jp, DeserializationContext ctxt) throws IOException; - protected boolean requiresIUpdaterFromLegacy(Layer[] layers){ - for(Layer l : layers){ + protected boolean requiresIUpdaterFromLegacy(LayerConfiguration[] layers){ + for(LayerConfiguration l : layers){ if(l instanceof BaseLayer){ BaseLayer bl = (BaseLayer)l; if(bl.getIUpdater() == null && bl.initializer().numParams(bl) > 0){ @@ -78,8 +76,8 @@ public abstract class BaseNetConfigDeserializer extends StdDeserializer im return false; } - protected boolean requiresDropoutFromLegacy(Layer[] layers){ - for(Layer l : layers){ + protected boolean requiresDropoutFromLegacy(LayerConfiguration[] layers){ + for(LayerConfiguration l : layers){ if(l.getIDropout() != null){ return false; } @@ -87,8 +85,8 @@ public abstract class BaseNetConfigDeserializer extends StdDeserializer im return true; } - protected boolean requiresRegularizationFromLegacy(Layer[] layers){ - for(Layer l : layers){ + protected boolean requiresRegularizationFromLegacy(LayerConfiguration[] layers){ + for(LayerConfiguration l : layers){ if(l instanceof BaseLayer && ((BaseLayer)l).getRegularization() == null){ return true; } @@ -96,8 +94,8 @@ public abstract class BaseNetConfigDeserializer extends StdDeserializer im return false; } - protected boolean requiresWeightInitFromLegacy(Layer[] layers){ - for(Layer l : layers){ + protected boolean requiresWeightInitFromLegacy(LayerConfiguration[] layers){ + for(LayerConfiguration l : layers){ if(l instanceof BaseLayer && ((BaseLayer)l).getWeightInitFn() == null){ return true; } @@ -105,8 +103,8 @@ public abstract class BaseNetConfigDeserializer extends StdDeserializer im return false; } - protected boolean requiresActivationFromLegacy(Layer[] layers){ - for(Layer l : layers){ + protected boolean requiresActivationFromLegacy(LayerConfiguration[] layers){ + for(LayerConfiguration l : layers){ if(l instanceof BaseLayer && ((BaseLayer)l).getActivationFn() == null){ return true; } @@ -114,8 +112,8 @@ public abstract class BaseNetConfigDeserializer extends StdDeserializer im return false; } - protected boolean requiresLegacyLossHandling(Layer[] layers){ - for(Layer l : layers){ + protected boolean requiresLegacyLossHandling(LayerConfiguration[] layers){ + for(LayerConfiguration l : layers){ if(l instanceof BaseOutputLayer && ((BaseOutputLayer)l).getLossFn() == null){ return true; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/ComputationGraphConfigurationDeserializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/ComputationGraphConfigurationDeserializer.java index edd9cbef8..cf9282771 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/ComputationGraphConfigurationDeserializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/ComputationGraphConfigurationDeserializer.java @@ -29,7 +29,7 @@ import org.deeplearning4j.nn.conf.graph.LayerVertex; import org.deeplearning4j.nn.conf.layers.BaseLayer; import org.deeplearning4j.nn.conf.layers.BaseOutputLayer; import org.deeplearning4j.nn.conf.layers.BatchNormalization; -import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.weightnoise.DropConnect; import org.deeplearning4j.nn.params.BatchNormalizationParamInitializer; import com.fasterxml.jackson.core.JsonLocation; @@ -65,16 +65,16 @@ public class ComputationGraphConfigurationDeserializer //Previously: enumerations and fields. Now: classes //Here, we manually create the appropriate Updater instances, if the IUpdater field is empty - List layerList = new ArrayList<>(); + List layerList = new ArrayList<>(); Map vertices = conf.getVertices(); for (Map.Entry entry : vertices.entrySet()) { if (entry.getValue() instanceof LayerVertex) { LayerVertex lv = (LayerVertex) entry.getValue(); - layerList.add(lv.getLayerConf().getLayer()); + layerList.add(lv.getLayerConfiguration()); } } - Layer[] layers = layerList.toArray(new Layer[layerList.size()]); + LayerConfiguration[] layers = layerList.toArray(new LayerConfiguration[layerList.size()]); //Now, check if we need to manually handle IUpdater deserialization from legacy format boolean attemptIUpdaterFromLegacy = requiresIUpdaterFromLegacy(layers); boolean requireLegacyRegularizationHandling = requiresRegularizationFromLegacy(layers); @@ -171,9 +171,9 @@ public class ComputationGraphConfigurationDeserializer // but, as there is no useLogStdev=false property for legacy batchnorm JSON, the 'real' value (useLogStdev=false) // is not set to override the default, unless we do it manually here for(GraphVertex gv : conf.getVertices().values()){ - if(gv instanceof LayerVertex && ((LayerVertex) gv).getLayerConf().getLayer() instanceof BatchNormalization){ - BatchNormalization bn = (BatchNormalization) ((LayerVertex) gv).getLayerConf().getLayer(); - List vars = ((LayerVertex) gv).getLayerConf().getVariables(); + if(gv instanceof LayerVertex && ((LayerVertex) gv).getLayerConfiguration() instanceof BatchNormalization){ + BatchNormalization bn = (BatchNormalization) ((LayerVertex) gv).getLayerConfiguration(); + List vars = ((LayerVertex) gv).getNetConfiguration().getNetWideVariables(); boolean isVariance = vars.contains(BatchNormalizationParamInitializer.GLOBAL_VAR); bn.setUseLogStd(!isVariance); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/JsonMappers.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/JsonMappers.java index 8097111d6..0b6871524 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/JsonMappers.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/JsonMappers.java @@ -22,7 +22,7 @@ package org.deeplearning4j.nn.conf.serde; import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.serde.legacy.LegacyJsonFormat; import com.fasterxml.jackson.databind.*; import com.fasterxml.jackson.databind.deser.BeanDeserializerModifier; @@ -76,8 +76,8 @@ public class JsonMappers { public JsonDeserializer modifyDeserializer(DeserializationConfig config, BeanDescription beanDesc, JsonDeserializer deserializer) { //Use our custom deserializers to handle backward compatibility for updaters -> IUpdater - if (beanDesc.getBeanClass() == MultiLayerConfiguration.class) { - return new MultiLayerConfigurationDeserializer(deserializer); + if (beanDesc.getBeanClass() == NeuralNetConfiguration.class) { + return new NeuralNetConfigurationDeserializer(deserializer); } else if (beanDesc.getBeanClass() == ComputationGraphConfiguration.class) { return new ComputationGraphConfigurationDeserializer(deserializer); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/MultiLayerConfigurationDeserializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/NeuralNetConfigurationDeserializer.java similarity index 89% rename from cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/MultiLayerConfigurationDeserializer.java rename to cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/NeuralNetConfigurationDeserializer.java index 36f4a9b45..17a474e78 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/MultiLayerConfigurationDeserializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/NeuralNetConfigurationDeserializer.java @@ -21,13 +21,12 @@ package org.deeplearning4j.nn.conf.serde; import org.apache.commons.io.IOUtils; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.dropout.Dropout; import org.deeplearning4j.nn.conf.layers.BaseLayer; import org.deeplearning4j.nn.conf.layers.BaseOutputLayer; import org.deeplearning4j.nn.conf.layers.BatchNormalization; -import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.weightnoise.DropConnect; import org.deeplearning4j.nn.params.BatchNormalizationParamInitializer; import com.fasterxml.jackson.core.JsonLocation; @@ -43,21 +42,19 @@ import java.io.IOException; import java.io.StringReader; import java.util.List; -public class MultiLayerConfigurationDeserializer extends BaseNetConfigDeserializer { +public class NeuralNetConfigurationDeserializer extends BaseNetConfigDeserializer { - public MultiLayerConfigurationDeserializer(JsonDeserializer defaultDeserializer) { - super(defaultDeserializer, MultiLayerConfiguration.class); + public NeuralNetConfigurationDeserializer(JsonDeserializer defaultDeserializer) { + super(defaultDeserializer, NeuralNetConfiguration.class); } @Override - public MultiLayerConfiguration deserialize(JsonParser jp, DeserializationContext ctxt) throws IOException { + public NeuralNetConfiguration deserialize(JsonParser jp, DeserializationContext ctxt) throws IOException { long charOffsetStart = jp.getCurrentLocation().getCharOffset(); - MultiLayerConfiguration conf = (MultiLayerConfiguration) defaultDeserializer.deserialize(jp, ctxt); - Layer[] layers = new Layer[conf.getConfs().size()]; - for (int i = 0; i < layers.length; i++) { - layers[i] = conf.getConf(i).getLayer(); - } + NeuralNetConfiguration conf = (NeuralNetConfiguration) defaultDeserializer.deserialize(jp, ctxt); + + LayerConfiguration[] layers = conf.getFlattenedLayerConfigurations().toArray(new LayerConfiguration[0]); //Now, check if we need to manually handle IUpdater deserialization from legacy format boolean attemptIUpdaterFromLegacy = requiresIUpdaterFromLegacy(layers); @@ -162,11 +159,11 @@ public class MultiLayerConfigurationDeserializer extends BaseNetConfigDeserializ //JSON deserialization uses public BatchNormalization() constructor which defaults to log10stdev now // but, as there is no useLogStdev=false property for legacy batchnorm JSON, the 'real' value (useLogStdev=false) // is not set to override the default, unless we do it manually here - for(NeuralNetConfiguration nnc : conf.getConfs()){ - Layer l = nnc.getLayer(); + for(NeuralNetConfiguration nnc : conf.getNetConfigurations()){ + LayerConfiguration l = nnc.getLayerConfigurations().get(0); if(l instanceof BatchNormalization){ BatchNormalization bn = (BatchNormalization)l; - List vars = nnc.getVariables(); + List vars = nnc.getNetWideVariables(); boolean isVariance = vars.contains(BatchNormalizationParamInitializer.GLOBAL_VAR); bn.setUseLogStd(!isVariance); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/legacy/LegacyJsonFormat.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/legacy/LegacyJsonFormat.java index c654b2698..ceb645be7 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/legacy/LegacyJsonFormat.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/legacy/LegacyJsonFormat.java @@ -61,7 +61,7 @@ public class LegacyJsonFormat { om.addMixIn(InputPreProcessor.class, InputPreProcessorMixin.class); om.addMixIn(GraphVertex.class, GraphVertexMixin.class); - om.addMixIn(Layer.class, LayerMixin.class); + om.addMixIn(LayerConfiguration.class, LayerMixin.class); om.addMixIn(ReconstructionDistribution.class, ReconstructionDistributionMixin.class); om.addMixIn(IActivation.class, IActivationMixin.class); om.addMixIn(ILossFunction.class, ILossFunctionMixin.class); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/weightnoise/DropConnect.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/weightnoise/DropConnect.java index cabb01843..926d2017d 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/weightnoise/DropConnect.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/weightnoise/DropConnect.java @@ -78,7 +78,7 @@ public class DropConnect implements IWeightNoise { @Override public INDArray getParameter(Layer layer, String paramKey, int iteration, int epoch, boolean train, LayerWorkspaceMgr workspaceMgr) { - ParamInitializer init = layer.conf().getLayer().initializer(); + ParamInitializer init = layer.getLayerConfiguration().initializer(); INDArray param = layer.getParam(paramKey); double p; @@ -88,8 +88,8 @@ public class DropConnect implements IWeightNoise { p = weightRetainProbSchedule.valueAt(iteration, epoch); } - if (train && init.isWeightParam(layer.conf().getLayer(), paramKey) - || (applyToBiases && init.isBiasParam(layer.conf().getLayer(), paramKey))) { + if (train && init.isWeightParam(layer.getLayerConfiguration(), paramKey) + || (applyToBiases && init.isBiasParam(layer.getLayerConfiguration(), paramKey))) { INDArray out = workspaceMgr.createUninitialized(ArrayType.INPUT, param.dataType(), param.shape(), param.ordering()); Nd4j.getExecutioner().exec(new DropOut(param, out, p)); return out; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/weightnoise/WeightNoise.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/weightnoise/WeightNoise.java index 0e789749b..fdf01ad66 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/weightnoise/WeightNoise.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/weightnoise/WeightNoise.java @@ -71,10 +71,10 @@ public class WeightNoise implements IWeightNoise { @Override public INDArray getParameter(Layer layer, String paramKey, int iteration, int epoch, boolean train, LayerWorkspaceMgr workspaceMgr) { - ParamInitializer init = layer.conf().getLayer().initializer(); + ParamInitializer init = layer.getLayerConfiguration().initializer(); INDArray param = layer.getParam(paramKey); - if (train && init.isWeightParam(layer.conf().getLayer(), paramKey) || - (applyToBias && init.isBiasParam(layer.conf().getLayer(), paramKey))) { + if (train && init.isWeightParam(layer.getLayerConfiguration(), paramKey) || + (applyToBias && init.isBiasParam(layer.getLayerConfiguration(), paramKey))) { org.nd4j.linalg.api.rng.distribution.Distribution dist = Distributions.createDistribution(distribution); INDArray noise = dist.sample(param.ulike()); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java index 4a080bb28..34d9b8c50 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java @@ -25,12 +25,13 @@ import lombok.NonNull; import lombok.Setter; import lombok.extern.slf4j.Slf4j; import lombok.val; -import net.brutex.ai.dnn.api.INeuralNetwork; +import net.brutex.ai.dnn.api.IModel; import net.brutex.ai.dnn.networks.ArtificialNeuralNetwork; import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.StringUtils; import org.bytedeco.javacpp.Pointer; import org.deeplearning4j.exception.DL4JInvalidConfigException; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.util.*; import org.nd4j.adapters.OutputAdapter; import org.nd4j.linalg.dataset.AsyncMultiDataSetIterator; @@ -105,8 +106,7 @@ import java.util.*; import java.util.concurrent.atomic.AtomicLong; @Slf4j -public class ComputationGraph extends ArtificialNeuralNetwork implements Serializable, Model, - INeuralNetwork { +public class ComputationGraph extends ArtificialNeuralNetwork implements Serializable { /** * This method returns configuration of this ComputationGraph @@ -220,6 +220,7 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali public ComputationGraph(ComputationGraphConfiguration computationGraphConfiguration) { + super(computationGraphConfiguration.getDefaultConfiguration()); this.computationGraphConfiguration = computationGraphConfiguration; this.numInputArrays = computationGraphConfiguration.getNetworkInputs().size(); this.numOutputArrays = computationGraphConfiguration.getNetworkOutputs().size(); @@ -543,7 +544,7 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali //Set RNG seed, for repeatability between initializations when set if (initializeParams) { - Nd4j.getRandom().setSeed(conf().getSeed()); + Nd4j.getRandom().setSeed(getNetConfiguration().getSeed()); } //Given the topological ordering: work out the subset of the parameters array used for each layer @@ -564,8 +565,8 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali int numLayers = 0; List tempLayerList = new ArrayList<>(); - defaultConfiguration.clearVariables(); - List variables = defaultConfiguration.variables(false); + defaultConfiguration.clearNetWideVariable(); + List variables = defaultConfiguration.netWideVariables(false); i = computationGraphConfiguration.getNetworkInputs().size(); for(; i layerVariables = l.conf().variables(); + List layerVariables = l.getNetConfiguration().netWideVariables(); if (layerVariables != null) { for (String s : layerVariables) { variables.add(gv.getVertexName() + "_" + s); @@ -689,7 +690,7 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali // now we init solver & optimizer if (solver == null) { try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { - solver = new Solver.Builder().configure(conf()).listeners(getListeners()).model(this).build(); + solver = new Solver.Builder().configure(getNetConfiguration()).listeners(getListeners()).model(this).build(); solver.initOptimizer(); } } @@ -710,7 +711,7 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali } for(Layer l : layers){ - String layerName = l.conf().getLayer().getLayerName(); + String layerName = l.getLayerConfiguration().getLayerName(); List inputs = computationGraphConfiguration.getVertexInputs().get(layerName); String in = inputs.get(0); //For now: layers should have exactly 1 input @@ -1158,7 +1159,7 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali } else { if (solver == null) { try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { - solver = new Solver.Builder().configure(conf()).listeners(getListeners()).model(this).build(); + solver = new Solver.Builder().configure(getNetConfiguration()).listeners(getListeners()).model(this).build(); } } @@ -2381,8 +2382,8 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali //Standard feed-forward case if(i > 0 && current.hasLayer() && prev.hasLayer() && - ConvolutionUtils.layerHasConvolutionLayout(prev.getLayer().conf().getLayer()) - && ConvolutionUtils.layerHasConvolutionLayout(current.getLayer().conf().getLayer())) { + ConvolutionUtils.layerHasConvolutionLayout(prev.getLayer().getLayerConfiguration()) + && ConvolutionUtils.layerHasConvolutionLayout(current.getLayer().getLayerConfiguration())) { /** * Not QUITE the proper fix, but getting close. @@ -2390,8 +2391,8 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali * Need to play with output sizes a bit to make sure we put the right parameters in there to get * correct behavior. */ - CNN2DFormat preLayerFormat = ConvolutionUtils.getFormatForLayer(prev.getLayer().conf().getLayer()); - CNN2DFormat currLayerFormat = ConvolutionUtils.getFormatForLayer(current.getLayer().conf().getLayer()); + CNN2DFormat preLayerFormat = ConvolutionUtils.getFormatForLayer(prev.getLayer().getLayerConfiguration()); + CNN2DFormat currLayerFormat = ConvolutionUtils.getFormatForLayer(current.getLayer().getLayerConfiguration()); if(preLayerFormat != currLayerFormat) { int inputIdx = -1; for(int inputVertex = 0; inputVertex < current.getInputVertices().length; inputVertex++) { @@ -2417,10 +2418,10 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali else out = current.doForward(train, workspaceMgr); } else if(i > 0 && current.hasLayer() && prev.hasLayer() && - Convolution1DUtils.hasRnnDataFormat(prev.getLayer().conf().getLayer()) - && Convolution1DUtils.hasRnnDataFormat(current.getLayer().conf().getLayer())) { - RNNFormat preLayerFormat = Convolution1DUtils.getRnnFormatFromLayer(prev.getLayer().conf().getLayer()); - RNNFormat currLayerFormat = Convolution1DUtils.getRnnFormatFromLayer(current.getLayer().conf().getLayer()); + Convolution1DUtils.hasRnnDataFormat(prev.getLayer().getLayerConfiguration()) + && Convolution1DUtils.hasRnnDataFormat(current.getLayer().getLayerConfiguration())) { + RNNFormat preLayerFormat = Convolution1DUtils.getRnnFormatFromLayer(prev.getLayer().getLayerConfiguration()); + RNNFormat currLayerFormat = Convolution1DUtils.getRnnFormatFromLayer(current.getLayer().getLayerConfiguration()); int inputIdx = -1; for(int inputVertex = 0; inputVertex < current.getInputVertices().length; inputVertex++) { if(current.getInputVertices()[inputVertex].getVertexIndex() == prev.getVertexIndex()) { @@ -2923,7 +2924,7 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali init(); for (Layer l : layers) { - l.setListeners(listeners); + l.setListeners(listeners.toArray(new TrainingListener[]{})); } if (solver != null) { @@ -2936,6 +2937,28 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali } } + /** + * The param table + * + * @return + */ + + public Map getParamTable() { + return null; + } + + /** + * Table of parameters by key, for backprop. For many models (dense layers, etc) - all parameters + * are backprop parameters + * + * @param backpropParamsOnly If true, return backprop params only. If false: return all params + * (equivalent to paramsTable()) + */ + + public Map getParamTable(boolean backpropParamsOnly) { + return null; + } + /** * Set the trainingListeners for the ComputationGraph (and all layers in the network) */ @@ -2994,7 +3017,7 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali */ public ComputationGraphUpdater getUpdater(boolean initializeIfAbsent){ if (solver == null && initializeIfAbsent) { - solver = new Solver.Builder().configure(conf()).listeners(getListeners()).model(this).build(); + solver = new Solver.Builder().configure(getNetConfiguration()).listeners(getListeners()).model(this).build(); solver.getOptimizer().setUpdaterComputationGraph(new ComputationGraphUpdater(this)); } if(solver != null) { @@ -3008,7 +3031,7 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali */ public void setUpdater(ComputationGraphUpdater updater) { if (solver == null) { - solver = new Solver.Builder().configure(conf()).listeners(getListeners()).model(this).build(); + solver = new Solver.Builder().configure(getNetConfiguration()).listeners(getListeners()).model(this).build(); } solver.getOptimizer().setUpdaterComputationGraph(updater); } @@ -3399,14 +3422,10 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali } @Override - public NeuralNetConfiguration conf() { + public NeuralNetConfiguration getNetConfiguration() { return defaultConfiguration; } - @Override - public void setConf(NeuralNetConfiguration conf) { - throw new UnsupportedOperationException(); - } @Override public INDArray input() { @@ -3434,16 +3453,11 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali } - @Override - public Map paramTable() { - return paramTable(false); - } - public Map paramTable(boolean backpropParamsOnly) { //Get all parameters from all layers/vertices Map allParams = new LinkedHashMap<>(); for(GraphVertex gv : vertices){ - Map paramMap = gv.paramTable(backpropParamsOnly); + Map paramMap = gv.getParamTable(backpropParamsOnly); for (Map.Entry entry : paramMap.entrySet()) { String newKey = gv.getVertexName() + "_" + entry.getKey(); allParams.put(newKey, entry.getValue()); @@ -3452,11 +3466,11 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali return allParams; } - @Override + public void setParamTable(@NonNull Map paramTable) { - Map m = paramTable(); + Map m = getParamTable(); Preconditions.checkArgument(paramTable.keySet().equals(m.keySet()), "Cannot set param table: parameter set keys are not equal"); - Map current = paramTable(); + Map current = getParamTable(); //Check shapes before doing partial assigment to avoid leaving net in incorrect state for(String s : current.keySet()){ INDArray arrCurrent = current.get(s); @@ -3580,7 +3594,7 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali * @return Hidden state, or null if layer is not an RNN layer */ public Map rnnGetPreviousState(int layer) { - return rnnGetPreviousState(layers[layer].conf().getLayer().getLayerName()); + return rnnGetPreviousState(layers[layer].getLayerConfiguration().getLayerName()); } /** @@ -3613,7 +3627,7 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali l = ((org.deeplearning4j.nn.layers.wrapper.BaseWrapperLayer)l).getUnderlying(); } if (l instanceof RecurrentLayer) { - states.put(l.conf().getLayer().getLayerName(), ((RecurrentLayer) l).rnnGetPreviousState()); + states.put(l.getLayerConfiguration().getLayerName(), ((RecurrentLayer) l).rnnGetPreviousState()); } } return states; @@ -3626,7 +3640,7 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali * @param state The state to set the specified layer to */ public void rnnSetPreviousState(int layer, Map state) { - rnnSetPreviousState(layers[layer].conf().getLayer().getLayerName(), state); + rnnSetPreviousState(layers[layer].getLayerConfiguration().getLayerName(), state); } /** @@ -3729,7 +3743,7 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali if (solver == null) { try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { - solver = new Solver.Builder().configure(conf()).listeners(getListeners()).model(this) + solver = new Solver.Builder().configure(getNetConfiguration()).listeners(getListeners()).model(this) .build(); } } @@ -3975,7 +3989,7 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali Layer outputLayer = getOutputLayer(0); if(this.getComputationGraphConfiguration().isValidateOutputLayerConfig()){ - OutputLayerUtil.validateOutputLayerForClassifierEvaluation(outputLayer.conf().getLayer(), Evaluation.class); + OutputLayerUtil.validateOutputLayerForClassifierEvaluation(outputLayer.getLayerConfiguration(), Evaluation.class); } return (T)doEvaluation(iterator, new org.deeplearning4j.eval.Evaluation(labelsList, topN))[0]; @@ -3993,7 +4007,7 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali public T evaluate(MultiDataSetIterator iterator, List labelsList, int topN) { Layer outputLayer = getOutputLayer(0); if(this.getComputationGraphConfiguration().isValidateOutputLayerConfig()){ - OutputLayerUtil.validateOutputLayerForClassifierEvaluation(outputLayer.conf().getLayer(), Evaluation.class); + OutputLayerUtil.validateOutputLayerForClassifierEvaluation(outputLayer.getLayerConfiguration(), Evaluation.class); } return (T)doEvaluation(iterator, new org.deeplearning4j.eval.Evaluation(labelsList, topN))[0]; } @@ -4058,7 +4072,7 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali public T evaluateROC(DataSetIterator iterator, int rocThresholdSteps) { Layer outputLayer = getOutputLayer(0); if(this.getComputationGraphConfiguration().isValidateOutputLayerConfig()){ - OutputLayerUtil.validateOutputLayerForClassifierEvaluation(outputLayer.conf().getLayer(), ROC.class); + OutputLayerUtil.validateOutputLayerForClassifierEvaluation(outputLayer.getLayerConfiguration(), ROC.class); } return (T)doEvaluation(iterator, new org.deeplearning4j.eval.ROC(rocThresholdSteps))[0]; } @@ -4081,7 +4095,7 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali public T evaluateROC(MultiDataSetIterator iterator, int rocThresholdSteps) { Layer outputLayer = getOutputLayer(0); if(this.getComputationGraphConfiguration().isValidateOutputLayerConfig()){ - OutputLayerUtil.validateOutputLayerForClassifierEvaluation(outputLayer.conf().getLayer(), ROC.class); + OutputLayerUtil.validateOutputLayerForClassifierEvaluation(outputLayer.getLayerConfiguration(), ROC.class); } return (T)doEvaluation(iterator, new org.deeplearning4j.eval.ROC(rocThresholdSteps))[0]; } @@ -4104,7 +4118,7 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali public T evaluateROCMultiClass(DataSetIterator iterator, int rocThresholdSteps) { Layer outputLayer = getOutputLayer(0); if(this.getComputationGraphConfiguration().isValidateOutputLayerConfig()){ - OutputLayerUtil.validateOutputLayerForClassifierEvaluation(outputLayer.conf().getLayer(), ROCMultiClass.class); + OutputLayerUtil.validateOutputLayerForClassifierEvaluation(outputLayer.getLayerConfiguration(), ROCMultiClass.class); } return (T)doEvaluation(iterator, new org.deeplearning4j.eval.ROCMultiClass(rocThresholdSteps))[0]; } @@ -4119,7 +4133,7 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali public T evaluateROCMultiClass(MultiDataSetIterator iterator, int rocThresholdSteps) { Layer outputLayer = getOutputLayer(0); if(this.getComputationGraphConfiguration().isValidateOutputLayerConfig()){ - OutputLayerUtil.validateOutputLayerForClassifierEvaluation(outputLayer.conf().getLayer(), ROCMultiClass.class); + OutputLayerUtil.validateOutputLayerForClassifierEvaluation(outputLayer.getLayerConfiguration(), ROCMultiClass.class); } return (T)doEvaluation(iterator, new org.deeplearning4j.eval.ROCMultiClass(rocThresholdSteps))[0]; } @@ -4396,19 +4410,19 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali paramShape = ""; if (currentLayer instanceof BidirectionalLayer) { // Bidirectional layer is not an FFL BidirectionalLayer bi = (BidirectionalLayer) currentLayer; - in = String.valueOf(((Bidirectional)bi.conf().getLayer()).getNIn()); - out = String.valueOf(((Bidirectional)bi.conf().getLayer()).getNOut()); + in = String.valueOf(((Bidirectional)bi.getLayerConfiguration()).getNIn()); + out = String.valueOf(((Bidirectional)bi.getLayerConfiguration()).getNOut()); } else { try { - in = String.valueOf(((FeedForwardLayer) currentLayer.conf().getLayer()).getNIn()); - out = String.valueOf(((FeedForwardLayer) currentLayer.conf().getLayer()).getNOut()); + in = String.valueOf(((FeedForwardLayer) currentLayer.getLayerConfiguration()).getNIn()); + out = String.valueOf(((FeedForwardLayer) currentLayer.getLayerConfiguration()).getNOut()); } catch (Exception e) { // Some layers, like PReLU, are just BaseLayers (but have parameters) } } - List paraNames = currentLayer.conf().variables(); + List paraNames = currentLayer.getNetConfiguration().netWideVariables(); for (String aP : paraNames) { - String paramS = ArrayUtils.toString(currentLayer.paramTable().get(aP).shape()); + String paramS = ArrayUtils.toString(currentLayer.getParamTable().get(aP).shape()); paramShape += aP + ":" + paramS + ", "; } paramShape = paramShape.subSequence(0, paramShape.lastIndexOf(",")).toString(); @@ -4738,7 +4752,7 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali throw new IllegalArgumentException("Invalid layer index: " + layer + ". ILayer index must be between 0 and " + (layers.length - 1) + " inclusive"); } - return layerSize(layers[layer].conf().getLayer().getLayerName()); + return layerSize(layers[layer].getLayerConfiguration().getLayerName()); } /** @@ -4757,7 +4771,7 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali throw new IllegalArgumentException("Invalid layer index: " + layer + ". ILayer index must be between 0 and " + (layers.length - 1) + " inclusive"); } - return layerInputSize(layers[layer].conf().getLayer().getLayerName()); + return layerInputSize(layers[layer].getLayerConfiguration().getLayerName()); } /** @@ -4775,7 +4789,7 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali if(l == null){ throw new IllegalArgumentException("No layer with name \"" + layerName + "\" exists"); } - org.deeplearning4j.nn.conf.layers.Layer conf = l.conf().getLayer(); + LayerConfiguration conf = l.getLayerConfiguration(); if (conf == null || !(conf instanceof FeedForwardLayer)) { return 0; } @@ -4800,7 +4814,7 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali if(l == null){ throw new IllegalArgumentException("No layer with name \"" + layerName + "\" exists"); } - org.deeplearning4j.nn.conf.layers.Layer conf = l.conf().getLayer(); + LayerConfiguration conf = l.getLayerConfiguration(); if (conf == null || !(conf instanceof FeedForwardLayer)) { return 0; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/BaseGraphVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/BaseGraphVertex.java index cdb124d75..759f214bc 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/BaseGraphVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/BaseGraphVertex.java @@ -38,6 +38,16 @@ public abstract class BaseGraphVertex implements GraphVertex { protected ComputationGraph graph; + public BaseGraphVertex(){}; + @Override + public Map getParamTable() { + return null; + } + + public void setParamTable(Map params) { + throw new RuntimeException("Not implemented."); + } + protected String vertexName; /** The index of this vertex */ @@ -197,7 +207,7 @@ public abstract class BaseGraphVertex implements GraphVertex { } @Override - public Map paramTable(boolean backpropOnly) { + public Map getParamTable(boolean backpropOnly) { return Collections.emptyMap(); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/BaseWrapperVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/BaseWrapperVertex.java index 949ee0f7e..0d2a3a26d 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/BaseWrapperVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/BaseWrapperVertex.java @@ -179,8 +179,8 @@ public abstract class BaseWrapperVertex implements GraphVertex { } @Override - public Map paramTable(boolean backpropOnly) { - return underlying.paramTable(backpropOnly); + public Map getParamTable(boolean backpropOnly) { + return underlying.getParamTable(backpropOnly); } @Override diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/GraphVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/GraphVertex.java index 61136e0db..96ac34c19 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/GraphVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/GraphVertex.java @@ -156,5 +156,5 @@ public interface GraphVertex extends Trainable, Serializable { * @param backpropOnly If true: exclude unsupervised training parameters * @return Parameter table */ - Map paramTable(boolean backpropOnly); + Map getParamTable(boolean backpropOnly); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/FrozenVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/FrozenVertex.java index 77107c6ee..c0b5999ac 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/FrozenVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/FrozenVertex.java @@ -20,6 +20,7 @@ package org.deeplearning4j.nn.graph.vertex.impl; +import java.util.Map; import lombok.AllArgsConstructor; import lombok.EqualsAndHashCode; import org.deeplearning4j.nn.api.TrainingConfig; @@ -46,4 +47,24 @@ public class FrozenVertex extends BaseWrapperVertex { } return config; } + + /** + * The param table + * + * @return + */ + @Override + public Map getParamTable() { + return null; + } + + /** + * Setter for the param table + * + * @param paramTable + */ + @Override + public void setParamTable(Map paramTable) { + + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/LayerVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/LayerVertex.java index 60f3dad0b..5f9ebdb35 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/LayerVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/LayerVertex.java @@ -85,12 +85,12 @@ public class LayerVertex extends BaseGraphVertex { return; this.layer = new FrozenLayer(this.layer); - this.layer.conf().getLayer().setLayerName(vertexName); + this.layer.getLayerConfiguration().setLayerName(vertexName); } @Override - public Map paramTable(boolean backpropOnly) { - return layer.paramTable(backpropOnly); + public Map getParamTable(boolean backpropOnly) { + return layer.getParamTable(backpropOnly); } @Override diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java index 5c4c8ee16..edaa3fb80 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java @@ -20,9 +20,19 @@ package org.deeplearning4j.nn.layers; +import java.lang.ref.Cleaner; +import java.lang.ref.PhantomReference; +import java.lang.ref.Reference; +import java.lang.ref.WeakReference; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Map; import lombok.AccessLevel; import lombok.Data; +import lombok.Getter; import lombok.NoArgsConstructor; +import lombok.NonNull; import lombok.Setter; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; @@ -30,411 +40,784 @@ import org.deeplearning4j.nn.api.TrainingConfig; import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.conf.CacheMode; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.workspace.ArrayType; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.deeplearning4j.optimize.api.ConvexOptimizer; import org.deeplearning4j.optimize.api.TrainingListener; +import org.nd4j.common.primitives.Pair; +import org.nd4j.evaluation.IEvaluation; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.common.primitives.Pair; - -import java.util.*; +import org.nd4j.linalg.dataset.api.DataSet; +import org.nd4j.linalg.dataset.api.MultiDataSet; +import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; +import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator; /** * A layer with input and output, no parameters or gradients */ @Data @NoArgsConstructor -public abstract class AbstractLayer implements Layer { +public abstract class AbstractLayer implements Layer { - @Setter(AccessLevel.NONE) - protected INDArray input; - protected INDArray preOutput; - protected NeuralNetConfiguration conf; - protected boolean dropoutApplied = false; - protected Collection trainingListeners = new ArrayList<>(); - protected int index = 0; - protected INDArray maskArray; - protected MaskState maskState; - protected CacheMode cacheMode = CacheMode.NONE; - protected boolean inputModificationAllowed = false; - protected DataType dataType; + @Setter(AccessLevel.NONE) + protected INDArray input; + protected INDArray preOutput; + @Getter + @NonNull + protected LayerConf_T layerConfiguration; + protected boolean dropoutApplied = false; + @Getter @Setter @NonNull + protected Collection trainingListeners = new ArrayList<>(); + @Deprecated public Collection getListeners() {return getTrainingListeners();} + @Deprecated public void setListeners(TrainingListener ... listeners) { setTrainingListeners(List.of(listeners));} + /** + * Set the {@link TrainingListener}s for this model. If any listeners have previously been set, + * they will be replaced by this method + * + * @param listeners + */ + @Deprecated + public void setListeners(Collection listeners) { + setTrainingListeners(listeners); + } - protected int iterationCount; - protected int epochCount; - public AbstractLayer(NeuralNetConfiguration conf, DataType dataType) { - this.conf = conf; - if (conf != null) - cacheMode = conf.getCacheMode(); - this.dataType = dataType; + protected int index = 0; + protected INDArray maskArray; + protected MaskState maskState; + protected CacheMode cacheMode = CacheMode.NONE; + protected boolean inputModificationAllowed = false; + protected DataType dataType; + protected int iterationCount; + protected int epochCount; + private List variables = new ArrayList<>(); + public AbstractLayer(LayerConfiguration layerConfiguration, DataType dataType) { + this.layerConfiguration = (LayerConf_T) layerConfiguration; + if (layerConfiguration != null) { + cacheMode = layerConfiguration.getNetConfiguration().getCacheMode(); + } + this.dataType = dataType; + } + + /** + * @param backpropOnly If true: return only parameters that are not exclusively used for layerwise + * pretraining + * @return Parameter table + */ + @Override + public Map getParamTable(boolean backpropOnly) { + return null; + } + + public void setParamTable(Map map) { + throw new RuntimeException("Not implemented."); + } + /** + * @return 1D gradients view array + */ + @Override + public INDArray getGradientsViewArray() { + return null; + } + + /** + * Creates and returns a copy of this object. The precise meaning of "copy" may depend on the + * class of the object. The general intent is that, for any object {@code x}, the expression: + *
+ *
+   * x.clone() != x
+ * will be true, and that the expression: + *
+ *
+   * x.clone().getClass() == x.getClass()
+ * will be {@code true}, but these are not absolute requirements. While it is typically the case + * that: + *
+ *
+   * x.clone().equals(x)
+ * will be {@code true}, this is not an absolute requirement. + *

+ * By convention, the returned object should be obtained by calling {@code super.clone}. If a + * class and all of its superclasses (except {@code Object}) obey this convention, it will be the + * case that {@code x.clone().getClass() == x.getClass()}. + *

+ * By convention, the object returned by this method should be independent of this object (which + * is being cloned). To achieve this independence, it may be necessary to modify one or more + * fields of the object returned by {@code super.clone} before returning it. Typically, this + * means copying any mutable objects that comprise the internal "deep structure" of the object + * being cloned and replacing the references to these objects with references to the copies. If a + * class contains only primitive fields or references to immutable objects, then it is usually the + * case that no fields in the object returned by {@code super.clone} need to be modified. + *

+ * The method {@code clone} for class {@code Object} performs a specific cloning operation. First, + * if the class of this object does not implement the interface {@code Cloneable}, then a + * {@code CloneNotSupportedException} is thrown. Note that all arrays are considered to implement + * the interface {@code Cloneable} and that the return type of the {@code clone} method of an + * array type {@code T[]} is {@code T[]} where T is any reference or primitive type. Otherwise, + * this method creates a new instance of the class of this object and initializes all its fields + * with exactly the contents of the corresponding fields of this object, as if by assignment; the + * contents of the fields are not themselves cloned. Thus, this method performs a "shallow copy" + * of this object, not a "deep copy" operation. + *

+ * The class {@code Object} does not itself implement the interface {@code Cloneable}, so calling + * the {@code clone} method on an object whose class is {@code Object} will result in throwing an + * exception at run time. + * + * @return a clone of this instance. + * @throws CloneNotSupportedException if the object's class does not support the {@code Cloneable} + * interface. Subclasses that override the {@code clone} method + * can also throw this exception to indicate that an instance + * cannot be cloned. + * @see Cloneable + */ + @Override + protected Object clone() throws CloneNotSupportedException { + return super.clone(); + } + + /** + * Called by the garbage collector on an object when garbage collection determines that there are + * no more references to the object. A subclass overrides the {@code finalize} method to dispose + * of system resources or to perform other cleanup. + *

+ * The general contract of {@code finalize} is that it is invoked if and when the Java™ + * virtual machine has determined that there is no longer any means by which this object can be + * accessed by any thread that has not yet died, except as a result of an action taken by the + * finalization of some other object or class which is ready to be finalized. The {@code finalize} + * method may take any action, including making this object available again to other threads; the + * usual purpose of {@code finalize}, however, is to perform cleanup actions before the object is + * irrevocably discarded. For example, the finalize method for an object that represents an + * input/output connection might perform explicit I/O transactions to break the connection before + * the object is permanently discarded. + *

+ * The {@code finalize} method of class {@code Object} performs no special action; it simply + * returns normally. Subclasses of {@code Object} may override this definition. + *

+ * The Java programming language does not guarantee which thread will invoke the {@code finalize} + * method for any given object. It is guaranteed, however, that the thread that invokes finalize + * will not be holding any user-visible synchronization locks when finalize is invoked. If an + * uncaught exception is thrown by the finalize method, the exception is ignored and finalization + * of that object terminates. + *

+ * After the {@code finalize} method has been invoked for an object, no further action is taken + * until the Java virtual machine has again determined that there is no longer any means by which + * this object can be accessed by any thread that has not yet died, including possible actions by + * other objects or classes which are ready to be finalized, at which point the object may be + * discarded. + *

+ * The {@code finalize} method is never invoked more than once by a Java virtual machine for any + * given object. + *

+ * Any exception thrown by the {@code finalize} method causes the finalization of this object to + * be halted, but is otherwise ignored. + * + * @throws Throwable the {@code Exception} raised by this method + * @apiNote Classes that embed non-heap resources have many options for cleanup of those + * resources. The class must ensure that the lifetime of each instance is longer than that of any + * resource it embeds. {@link Reference#reachabilityFence} can be used to ensure that objects + * remain reachable while resources embedded in the object are in use. + *

+ * A subclass should avoid overriding the {@code finalize} method unless the subclass embeds + * non-heap resources that must be cleaned up before the instance is collected. Finalizer + * invocations are not automatically chained, unlike constructors. If a subclass overrides + * {@code finalize} it must invoke the superclass finalizer explicitly. To guard against + * exceptions prematurely terminating the finalize chain, the subclass should use a + * {@code try-finally} block to ensure {@code super.finalize()} is always invoked. For example, + *

{@code      @Override
+   *     protected void finalize() throws Throwable {
+   *         try {
+   *             ... // cleanup subclass state
+   *         } finally {
+   *             super.finalize();
+   *         }
+   *     }
+   * }
+ * @jls 12.6 Finalization of Class Instances + * @see WeakReference + * @see PhantomReference + * @deprecated The finalization mechanism is inherently problematic. Finalization can lead to + * performance issues, deadlocks, and hangs. Errors in finalizers can lead to resource leaks; + * there is no way to cancel finalization if it is no longer necessary; and no ordering is + * specified among calls to {@code finalize} methods of different objects. Furthermore, there are + * no guarantees regarding the timing of finalization. The {@code finalize} method might be called + * on a finalizable object only after an indefinite delay, if at all. + *

+ * Classes whose instances hold non-heap resources should provide a method to enable explicit + * release of those resources, and they should also implement {@link AutoCloseable} if + * appropriate. The {@link Cleaner} and {@link PhantomReference} provide more flexible and + * efficient ways to release resources when an object becomes unreachable. + */ + @Override + protected void finalize() throws Throwable { + super.finalize(); + } + + /** + * This method returns updater state (if applicable), null otherwise + * + * @return + */ + @Override + public INDArray updaterState() { + return null; + } + + /** + * This method returns Optimizer used for training + * + * @return + */ + @Override + public ConvexOptimizer getOptimizer() { + return null; + } + + /** + * This method fits model with a given DataSet + * + * @param dataSet + */ + @Override + public void fit(DataSet dataSet) { + + } + + /** + * This method fits model with a given MultiDataSet + * + * @param dataSet + */ + @Override + public void fit(MultiDataSet dataSet) { + + } + + /** + * This method fits model with a given DataSetIterator + * + * @param iterator + */ + @Override + public void fit(DataSetIterator iterator) { + + } + + /** + * This method fits model with a given MultiDataSetIterator + * + * @param iterator + */ + @Override + public void fit(MultiDataSetIterator iterator) { + + } + + /** + * This method executes evaluation of the model against given iterator and evaluation + * implementations + * + * @param iterator + * @param evaluations + */ + @Override + public T[] doEvaluation(DataSetIterator iterator, T... evaluations) { + return null; + } + + /** + * This method executes evaluation of the model against given iterator and evaluation + * implementations + * + * @param iterator + * @param evaluations + */ + @Override + public T[] doEvaluation(MultiDataSetIterator iterator, T... evaluations) { + return null; + } + + /** + * @param netConfiguration + */ + @Override + public void setNetConfiguration(@NonNull NeuralNetConfiguration netConfiguration) { + + } + + /** + * Init the model + */ + @Override + public void init() { + + } + + /** + * This method ADDS additional TrainingListener to existing listeners + * + * @param listener + */ + @Override + public void addListeners(TrainingListener... listener) { + this.trainingListeners.addAll(List.of(listener)); + } + + /** + * Update layer weights and biases with gradient change + * + * @param gradient + */ + @Override + public void update(Gradient gradient) { + + } + + /** + * Perform one update applying the gradient + * + * @param gradient the gradient to apply + * @param paramType + */ + @Override + public void update(INDArray gradient, String paramType) { + + } + + /** + * Update the score + * + * @param workspaceMgr + */ + @Override + public void computeGradientAndScore(LayerWorkspaceMgr workspaceMgr) { + + } + + /** + * the number of parameters for the model + * + * @param backwards + * @return the number of parameters for the model + */ + @Override + public long numParams(boolean backwards) { + return 0; + } + + /** + * Set the parameters for this model. This expects a linear ndarray which then be unpacked + * internally relative to the expected ordering of the model + * + * @param params the parameters for the model + */ + @Override + public void setParams(INDArray params) { + + } + + /** + * Set the initial parameters array as a view of the full (backprop) network parameters NOTE: this + * is intended to be used internally in MultiLayerNetwork and ComputationGraph, not by users. + * + * @param params a 1 x nParams row vector that is a view of the larger (MLN/CG) parameters array + */ + @Override + public void setParamsViewArray(INDArray params) { + + } + + /** + * Set the gradients array as a view of the full (backprop) network parameters NOTE: this is + * intended to be used internally in MultiLayerNetwork and ComputationGraph, not by users. + * + * @param gradients a 1 x nParams row vector that is a view of the larger (MLN/CG) gradients + * array + */ + @Override + public void setBackpropGradientsViewArray(INDArray gradients) { + + } + + /** + * The current inputs batch size + * + * @return the current inputs batch size + */ + @Override + public int batchSize() { + return 0; + } + + /** + * The input/feature matrix for the model + * + * @return the input/feature matrix for the model + */ + @Override + public INDArray input() { + return null; + } + + /** + * Get a parameter array for a given parameter type key + * + * @param param the key of the parameter + * @return ndarray of parameters + */ + @Override + public INDArray getParam(String param) { + return null; + } + + + /** + * The param table + * + * @return + */ + @Override + public Map getParamTable() { + return null; + } + + /** + * Set the parameters for a given parameter type. + * + * @param key the param type key to set + * @param val the new parameters ndarray + */ + @Override + public void setParam(String key, INDArray val) { + + } + + /** + * + */ + @Override + public void close() { + + } + + /** + * Calculate the gradient relative to the error in the next layer + * + * @param epsilon w^(L+1)*delta^(L+1). Or, equiv: dC/da, i.e., (dC/dz)*(dz/da) = dC/da, where + * C is cost function a=sigma(z) is activation. + * @param workspaceMgr Workspace manager + * @return Pair where Gradient is gradient for this layer, INDArray is epsilon + * (activation gradient) needed by next layer, but before element-wise multiply by sigmaPrime(z). + * So for standard feed-forward layer, if this layer is L, then return.getSecond() == dL/dIn = + * (w^(L)*(delta^(L))^T)^T. Note that the returned array should be placed in the + * {@link ArrayType#ACTIVATION_GRAD} workspace via the workspace manager + */ + @Override + public Pair backpropGradient(INDArray epsilon, + LayerWorkspaceMgr workspaceMgr) { + return null; + } + + /** + * Perform forward pass and return the activations array with the last set input + * + * @param training training or test mode + * @param workspaceMgr Workspace manager + * @return the activation (layer output) of the last specified input. Note that the returned array + * should be placed in the {@link ArrayType#ACTIVATIONS} workspace via the workspace manager + */ + @Override + public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) { + return null; + } + + /** + * Returns true if the layer can be trained in an unsupervised/pretrain manner (AE, VAE, etc) + * + * @return true if the layer can be pretrained (using fit(INDArray), false otherwise + */ + @Override + public boolean isPretrainLayer() { + return false; + } + + /** + * + */ + @Override + public void clearNoiseWeightParams() { + + } + + public List variables() { + return variables; + } + + public List variables(boolean copy) { + if (copy) { + return variables(); + } + return variables; + } + + /** + * The configuration for the neural network + * + * @return the configuration for the neural network + */ + @Override + public NeuralNetConfiguration getNetConfiguration() { + return layerConfiguration.getNetConfiguration(); + } + + public void addVariable(String variable) { + if (!variables.contains(variable)) { + variables.add(variable); + } + } + + /** + * Return the configuration of this layer + * + * @return the configuration + */ + @Override + public LayerConfiguration getLayerConfiguration() { + return layerConf(); + } + + public void setLayerConfiguration(LayerConfiguration layerConfiguration) { + this.layerConfiguration = (LayerConf_T) layerConfiguration; + } + + @Override + public void setCacheMode(CacheMode mode) { + if (mode == null) { + mode = CacheMode.NONE; } - @Override - public void setCacheMode(CacheMode mode) { - if (mode == null) - mode = CacheMode.NONE; + this.cacheMode = mode; + } - this.cacheMode = mode; + public LayerConf_T layerConf() { + return this.layerConfiguration; + } + + @Override + public TrainingConfig getConfig() { + return layerConfiguration; + } + + protected String layerId() { + String name = this.layerConfiguration.getLayerName(); + return "(layer name: " + (name == null ? "\"\"" : name) + ", layer index: " + index + + ", layer type: " + + getClass().getSimpleName() + ")"; + } + + public INDArray getInput() { + return input; + } + + public int getEpochCount() { + return epochCount; + } + + public void setEpochCount(int epochCount) { + this.epochCount = epochCount; + } + + @Override + public void setInput(INDArray input, LayerWorkspaceMgr workspaceMgr) { + this.input = workspaceMgr.leverageTo(ArrayType.INPUT, input); + dropoutApplied = false; + } + + @Override + public int getIndex() { + return index; + } + + @Override + public void setIndex(int index) { + this.index = index; + } + + /** + * Returns the parameters of the neural network as a flattened row vector + * + * @return the parameters of the neural network + */ + @Override + public INDArray params() { + return null; + } + + protected void setParams(INDArray params, char order) { + throw new UnsupportedOperationException("Not supported"); + } + + /** + * @return Number of parameters + */ + @Override + public long numParams() { + return 0; + } + + protected void applyMask(INDArray to) { + to.muliColumnVector(maskArray.castTo(to.dataType())); + } + + @Override + public INDArray activate(INDArray input, boolean training, LayerWorkspaceMgr workspaceMgr) { + setInput(input, workspaceMgr); + return activate(training, workspaceMgr); + } + + @Override + public double calcRegularizationScore(boolean backpropParamsOnly) { + return 0.0; + } + + + @Deprecated + public void clear() { + input = null; + maskArray = null; + maskState = null; + if (layerConf().getIDropout() != null) { + layerConf().getIDropout().clear(); } + } - public LayerConfT layerConf() { - return (LayerConfT) this.conf.getLayer(); + protected void applyDropOutIfNecessary(boolean training, LayerWorkspaceMgr workspaceMgr) { + if (training && !dropoutApplied && layerConf().getIDropout() != null) { + INDArray result; + if (inputModificationAllowed) { + result = input; + } else { + result = workspaceMgr.createUninitialized(ArrayType.INPUT, input.dataType(), input.shape(), + input.ordering()); + } + + input = layerConf().getIDropout() + .applyDropout(input, result, getIterationCount(), getEpochCount(), workspaceMgr); + dropoutApplied = true; } + } - @Override - public TrainingConfig getConfig(){ - return conf.getLayer(); + protected INDArray backpropDropOutIfPresent(INDArray epsilon) { + if (layerConf().getIDropout() != null) { + layerConf().getIDropout().backprop(epsilon, epsilon, getIterationCount(), getEpochCount()); } + return epsilon; + } - protected String layerId() { - String name = this.conf().getLayer().getLayerName(); - return "(layer name: " + (name == null ? "\"\"" : name) + ", layer index: " + index + ", layer type: " + - getClass().getSimpleName() + ")"; + + @Override + public Type type() { + return Type.FEED_FORWARD; + } + + + public void fit(INDArray input, LayerWorkspaceMgr workspaceMgr) { + throw new UnsupportedOperationException("Not supported"); + } + + + public Pair gradientAndScore() { + return new Pair<>(gradient(), score()); + } + + @Override + public int getInputMiniBatchSize() { + return (int) input.size(0); + } + + @Override + public void setInputMiniBatchSize(int size) { + } + + @Override + public INDArray getMaskArray() { + return maskArray; + } + + @Override + public void setMaskArray(INDArray maskArray) { + this.maskArray = maskArray; + } + + @Override + public Pair feedForwardMaskArray(INDArray maskArray, + MaskState currentMaskState, int minibatchSize) { + //Most layers: CNN, dense, activation, etc - set mask array, mask state and then leave the mask unmodified + + this.maskArray = maskArray; + this.maskState = currentMaskState; + + return new Pair<>(maskArray, currentMaskState); + } + + + public Gradient gradient() { + throw new UnsupportedOperationException( + "Not supported for this layer, or should be overridden for layers requiring it"); + } + + + public void fit() { + throw new UnsupportedOperationException( + "Not supported for this layer, or should be overridden for layers requiring it"); + } + + + public double score() { + throw new UnsupportedOperationException( + "Not supported for this layer, or should be overridden for layers requiring it"); + } + + + public void applyConstraints(int iteration, int epoch) { + if (layerConf().getConstraints() != null) { + for (LayerConstraint lc : layerConf().getConstraints()) { + lc.applyConstraint(this, iteration, epoch); + } } + } - public INDArray getInput() { - return input; + public void assertInputSet(boolean backprop) { + if (input == null) { + if (backprop) { + throw new IllegalStateException( + "Cannot perform backprop in layer " + getClass().getSimpleName() + + ": layer input field is not set"); + } else { + throw new IllegalStateException( + "Cannot perform forward pass in layer " + getClass().getSimpleName() + + ": layer input field is not set"); + } } + } - public int getEpochCount() { - return epochCount; - } + @Override + public void allowInputModification(boolean allow) { + inputModificationAllowed = allow; + } - public void setEpochCount(int epochCount) { - this.epochCount = epochCount; - } + @Override + public LayerHelper getHelper() { + //Layers with helpers should override this method! + return null; + } - /** - * Init the model - */ - @Override - public void init() { + @Override + public boolean updaterDivideByMinibatch(String paramName) { + //Majority of params's gradients should be... Exception: batch norm mean/variance estimate + return true; + } - } - - @Override - public void setInput(INDArray input, LayerWorkspaceMgr workspaceMgr) { - this.input = workspaceMgr.leverageTo(ArrayType.INPUT, input); - dropoutApplied = false; - } - - @Override - public int getIndex() { - return index; - } - - @Override - public void setIndex(int index) { - this.index = index; - } - - - @Override - public Collection getListeners() { - return trainingListeners; - } - - @Override - public void setListeners(Collection listeners) { - this.trainingListeners = listeners != null ? listeners : new ArrayList(); - } - - /** - * This method ADDS additional TrainingListener to existing listeners - * - * @param listeners - */ - @Override - public void addListeners(TrainingListener... listeners) { - if (this.trainingListeners == null) { - setListeners(listeners); - return; - } - - Collections.addAll(trainingListeners, listeners); - } - - @Override - public void setListeners(TrainingListener... listeners) { - setListeners(Arrays.asList(listeners)); - } - - @Override - public void computeGradientAndScore(LayerWorkspaceMgr workspaceMgr) { - throw new UnsupportedOperationException("Not supported"); - } - - @Override - public void update(Gradient gradient) { - throw new UnsupportedOperationException(); - } - - @Override - public void update(INDArray gradient, String paramType) { - throw new UnsupportedOperationException(); - } - - - @Override - public ConvexOptimizer getOptimizer() { - throw new UnsupportedOperationException("Not supported"); - } - - @Override - public void setConf(NeuralNetConfiguration conf) { - this.conf = conf; - } - - /**Returns the parameters of the neural network as a flattened row vector - * @return the parameters of the neural network - */ - @Override - public INDArray params() { - return null; - } - - @Override - public INDArray getParam(String param) { - throw new UnsupportedOperationException("Not supported"); - } - - @Override - public void setParam(String key, INDArray val) { - throw new UnsupportedOperationException("Not supported"); - } - - @Override - public void setParams(INDArray params) { - if (params != null) { - throw new UnsupportedOperationException("Not supported"); - } - } - - protected void setParams(INDArray params, char order) { - throw new UnsupportedOperationException("Not supported"); - } - - @Override - public void setParamsViewArray(INDArray params) { - if (params != null) { - throw new UnsupportedOperationException("Not supported"); - } - } - - @Override - public INDArray getGradientsViewArray() { - return null; - } - - @Override - public void setBackpropGradientsViewArray(INDArray gradients) { - if (gradients != null) { - throw new UnsupportedOperationException("Not supported"); - } - } - - @Override - public void setParamTable(Map paramTable) { - if (paramTable != null && !paramTable.isEmpty()) { - throw new UnsupportedOperationException("Not supported"); - } - } - - @Override - public Map paramTable() { - return paramTable(false); - } - - @Override - public Map paramTable(boolean backpropParamsOnly) { - return Collections.emptyMap(); - } - - protected void applyMask(INDArray to) { - to.muliColumnVector(maskArray.castTo(to.dataType())); - } - - @Override - public INDArray activate(INDArray input, boolean training, LayerWorkspaceMgr workspaceMgr) { - setInput(input, workspaceMgr); - return activate(training, workspaceMgr); - } - - @Override - public double calcRegularizationScore(boolean backpropParamsOnly){ - return 0.0; - } - - @Override - public int batchSize() { - return (int) input.size(0); - } - - @Override - public NeuralNetConfiguration conf() { - return conf; - } - - - @Override - public void clear() { - input = null; - maskArray = null; - maskState = null; - if(layerConf().getIDropout() != null){ - layerConf().getIDropout().clear(); - } - } - - protected void applyDropOutIfNecessary(boolean training, LayerWorkspaceMgr workspaceMgr){ - if(training && !dropoutApplied && layerConf().getIDropout() != null ){ - INDArray result; - if(inputModificationAllowed){ - result = input; - } else { - result = workspaceMgr.createUninitialized(ArrayType.INPUT, input.dataType(), input.shape(), input.ordering()); - } - - input = layerConf().getIDropout().applyDropout(input, result, getIterationCount(), getEpochCount(), workspaceMgr); - dropoutApplied = true; - } - } - - protected INDArray backpropDropOutIfPresent(INDArray epsilon){ - if(layerConf().getIDropout() != null ){ - layerConf().getIDropout().backprop(epsilon, epsilon, getIterationCount(), getEpochCount()); - } - return epsilon; - } - - - @Override - public Type type() { - return Type.FEED_FORWARD; - } - - /** - * The number of parameters for the model - * - * @return the number of parameters for the model - */ - @Override - public long numParams() { - return 0; - } - - @Override - public long numParams(boolean backwards) { - return numParams(); - } - - @Override - public void fit(INDArray input, LayerWorkspaceMgr workspaceMgr) { - throw new UnsupportedOperationException("Not supported"); - } - - - @Override - public Pair gradientAndScore() { - return new Pair<>(gradient(), score()); - } - - @Override - public INDArray input() { - return input; - } - - @Override - public void setInputMiniBatchSize(int size) {} - - @Override - public int getInputMiniBatchSize() { - return (int) input.size(0); - } - - @Override - public void setMaskArray(INDArray maskArray) { - this.maskArray = maskArray; - } - - @Override - public INDArray getMaskArray() { - return maskArray; - } - - - @Override - public Pair feedForwardMaskArray(INDArray maskArray, MaskState currentMaskState, int minibatchSize) { - //Most layers: CNN, dense, activation, etc - set mask array, mask state and then leave the mask unmodified - - this.maskArray = maskArray; - this.maskState = currentMaskState; - - return new Pair<>(maskArray, currentMaskState); - } - - - @Override - public Gradient gradient() { - throw new UnsupportedOperationException( - "Not supported for this layer, or should be overridden for layers requiring it"); - } - - @Override - public void fit() { - throw new UnsupportedOperationException( - "Not supported for this layer, or should be overridden for layers requiring it"); - } - - @Override - public double score() { - throw new UnsupportedOperationException( - "Not supported for this layer, or should be overridden for layers requiring it"); - } - - - @Override - public void applyConstraints(int iteration, int epoch){ - if(layerConf().getConstraints() != null){ - for(LayerConstraint lc : layerConf().getConstraints()){ - lc.applyConstraint(this, iteration, epoch); - } - } - } - - public void assertInputSet(boolean backprop){ - if(input == null){ - if(backprop){ - throw new IllegalStateException("Cannot perform backprop in layer " + getClass().getSimpleName() - + ": layer input field is not set"); - } else { - throw new IllegalStateException("Cannot perform forward pass in layer " + getClass().getSimpleName() - + ": layer input field is not set"); - } - } - } - - @Override - public void allowInputModification(boolean allow){ - inputModificationAllowed = allow; - } - - @Override - public LayerHelper getHelper() { - //Layers with helpers should override this method! - return null; - } - - @Override - public boolean updaterDivideByMinibatch(String paramName) { - //Majority of params's gradients should be... Exception: batch norm mean/variance estimate - return true; - } - - @Override - public void close(){ - //No-op for individual layers - } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/ActivationLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/ActivationLayer.java index f83b1cf31..7043275a0 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/ActivationLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/ActivationLayer.java @@ -22,6 +22,7 @@ package org.deeplearning4j.nn.layers; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.nd4j.linalg.api.buffer.DataType; @@ -33,7 +34,7 @@ import org.deeplearning4j.nn.workspace.ArrayType; public class ActivationLayer extends AbstractLayer { - public ActivationLayer(NeuralNetConfiguration conf, DataType dataType) { + public ActivationLayer(LayerConfiguration conf, DataType dataType) { super(conf, dataType); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java index ed1176133..68de26b7c 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java @@ -20,10 +20,21 @@ package org.deeplearning4j.nn.layers; +import java.lang.reflect.Constructor; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import lombok.Getter; +import lombok.NonNull; import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.exception.DL4JInvalidInputException; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.params.DefaultParamInitializer; @@ -31,421 +42,650 @@ import org.deeplearning4j.nn.workspace.ArrayType; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.deeplearning4j.optimize.Solver; import org.deeplearning4j.optimize.api.ConvexOptimizer; +import org.deeplearning4j.optimize.api.TrainingListener; +import org.nd4j.common.primitives.Pair; +import org.nd4j.evaluation.IEvaluation; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.memory.MemoryWorkspace; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ops.impl.transforms.custom.LayerNorm; import org.nd4j.linalg.api.ops.impl.transforms.custom.LayerNormBp; +import org.nd4j.linalg.dataset.api.DataSet; +import org.nd4j.linalg.dataset.api.MultiDataSet; +import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; +import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.NDArrayIndex; import org.nd4j.linalg.learning.regularization.Regularization; -import org.nd4j.common.primitives.Pair; - -import java.lang.reflect.Constructor; -import java.util.*; /** * A layer with parameters + * * @author Adam Gibson */ @Slf4j public abstract class BaseLayer - extends AbstractLayer { + extends AbstractLayer { - protected INDArray paramsFlattened; - protected INDArray gradientsFlattened; - protected Map params; - protected transient Map gradientViews; - protected double score = 0.0; - protected ConvexOptimizer optimizer; - protected Gradient gradient; - protected Solver solver; + /** + * This method executes evaluation of the model against given iterator and evaluation + * implementations + * + * @param iterator + * @param evaluations + */ + @Override + public T[] doEvaluation(DataSetIterator iterator, T... evaluations) { + return null; + } - protected Map weightNoiseParams = new HashMap<>(); + /** + * This method executes evaluation of the model against given iterator and evaluation + * implementations + * + * @param iterator + * @param evaluations + */ + @Override + public T[] doEvaluation(MultiDataSetIterator iterator, T... evaluations) { + return null; + } - public BaseLayer(NeuralNetConfiguration conf, DataType dataType) { - super(conf, dataType); + /** + * @param netConfiguration + */ + @Override + public void setNetConfiguration(@NonNull NeuralNetConfiguration netConfiguration) { + + } + + /** + * Init the model + */ + @Override + public void init() { + + } + + /** + * This method ADDS additional TrainingListener to existing listeners + * + * @param listener + */ + @Override + public void addListeners(TrainingListener... listener) { + + } + + /** + * Update layer weights and biases with gradient change + * + * @param gradient + */ + @Override + public void update(Gradient gradient) { + + } + + /** + * Perform one update applying the gradient + * + * @param gradient the gradient to apply + * @param paramType + */ + @Override + public void update(INDArray gradient, String paramType) { + + } + + /** + * the number of parameters for the model + * + * @param backwards + * @return the number of parameters for the model + */ + @Override + public long numParams(boolean backwards) { + return 0; + } + + /** + * Set the parameters for this model. This expects a linear ndarray which then be unpacked + * internally relative to the expected ordering of the model + * + * @param params the parameters for the model + */ + @Override + public void setParams(INDArray params) { + + } + + /** + * The current inputs batch size + * + * @return the current inputs batch size + */ + @Override + public int batchSize() { + return 0; + } + + /** + * The input/feature matrix for the model + * + * @return the input/feature matrix for the model + */ + @Override + public INDArray input() { + return null; + } + + /** + * Get a parameter array for a given parameter type key + * + * @param param the key of the parameter + * @return ndarray of parameters + */ + @Override + public INDArray getParam(String param) { + return null; + } + + /** + * Set the {@link TrainingListener}s for this model. If any listeners have previously been set, + * they will be replaced by this method + * + * @param listeners + */ + @Override + public void setListeners(TrainingListener... listeners) { + + } + + /** + * Set the parameters for a given parameter type. + * + * @param key the param type key to set + * @param val the new parameters ndarray + */ + @Override + public void setParam(String key, INDArray val) { + + } + + /** + * + */ + @Override + public void close() { + + } + + /** + * This method fits model with a given DataSet + * + * @param dataSet + */ + @Override + public void fit(DataSet dataSet) { + + } + + /** + * This method fits model with a given MultiDataSet + * + * @param dataSet + */ + @Override + public void fit(MultiDataSet dataSet) { + + } + + /** + * This method fits model with a given DataSetIterator + * + * @param iterator + */ + @Override + public void fit(DataSetIterator iterator) { + + } + + /** + * This method fits model with a given MultiDataSetIterator + * + * @param iterator + */ + @Override + public void fit(MultiDataSetIterator iterator) { + + } + + /** + * This method returns updater state (if applicable), null otherwise + * + * @return + */ + @Override + public INDArray updaterState() { + return null; + } + + protected double score = 0.0; + protected ConvexOptimizer optimizer; + protected Gradient gradient; + protected Solver solver; + protected Map weightNoiseParams = new HashMap<>(); + protected INDArray paramsFlattened; + protected INDArray gradientsFlattened; + /** + * Full table of parameters + */ + protected Map paramsTable; + @Getter protected transient Map gradientViews; + + public BaseLayer(LayerConfiguration conf, DataType dataType) { + super(conf, dataType); + } + + + /** + * and others even use \epsilon (epsilon) + * http://web.cs.swarthmore.edu/~meeden/cs81/s10/BackPropDeriv.pdf + * + * @param epsilon w^(L+1)*delta^(L+1). Or, equiv: dC/da, i.e., (dC/dz)*(dz/da) = dC/da, where + * C is cost function a=sigma(z) is activation. + * @param workspaceMgr Workspace manager + * @return + */ + @Override + public Pair backpropGradient(INDArray epsilon, + LayerWorkspaceMgr workspaceMgr) { + assertInputSet(true); + //If this layer is layer L, then epsilon is (w^(L+1)*(d^(L+1))^T) (or equivalent) + Pair zAndPreNorm = preOutputWithPreNorm(true, true, workspaceMgr); + INDArray z = zAndPreNorm.getFirst(); //Note: using preOutput(INDArray) can't be used as this does a setInput(input) and resets the 'appliedDropout' flag + INDArray preNorm = zAndPreNorm.getSecond(); + INDArray delta = layerConf().getActivationFn().backprop(z, epsilon) + .getFirst(); //TODO handle activation function params + + if (maskArray != null) { + applyMask(delta); } - public LayerConfT layerConf() { - return (LayerConfT) this.conf.getLayer(); + Gradient ret = new DefaultGradient(); + + if (hasBias()) { + INDArray biasGrad = gradientViews.get(DefaultParamInitializer.BIAS_KEY); + delta.sum(biasGrad, 0); //biasGrad is initialized/zeroed first + ret.gradientForVariable().put(DefaultParamInitializer.BIAS_KEY, biasGrad); } - @Override - public Pair backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { - assertInputSet(true); - //If this layer is layer L, then epsilon is (w^(L+1)*(d^(L+1))^T) (or equivalent) - Pair zAndPreNorm = preOutputWithPreNorm(true, true, workspaceMgr); - INDArray z = zAndPreNorm.getFirst(); //Note: using preOutput(INDArray) can't be used as this does a setInput(input) and resets the 'appliedDropout' flag - INDArray preNorm = zAndPreNorm.getSecond(); - INDArray delta = layerConf().getActivationFn().backprop(z, epsilon).getFirst(); //TODO handle activation function params + INDArray W = getParamWithNoise(DefaultParamInitializer.WEIGHT_KEY, true, workspaceMgr); - if (maskArray != null) { - applyMask(delta); + INDArray epsilonNext = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, + delta.dataType(), new long[]{W.size(0), delta.size(0)}, 'f'); + if (hasLayerNorm()) { + INDArray g = getParam(DefaultParamInitializer.GAIN_KEY); + + INDArray dldg = gradientViews.get(DefaultParamInitializer.GAIN_KEY); + Nd4j.getExecutioner().exec(new LayerNormBp(preNorm, g, delta, delta, dldg, true, 1)); + ret.gradientForVariable().put(DefaultParamInitializer.GAIN_KEY, dldg); + + } + + epsilonNext = W.mmuli(delta.transpose(), epsilonNext) + .transpose(); //W.mmul(delta.transpose()).transpose(); + + INDArray weightGrad = gradientViews.get(DefaultParamInitializer.WEIGHT_KEY); //f order + Nd4j.gemm(input.castTo(weightGrad.dataType()), delta, weightGrad, true, false, 1.0, + 0.0); //TODO avoid castTo? + ret.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, weightGrad); + + weightNoiseParams.clear(); + + epsilonNext = backpropDropOutIfPresent(epsilonNext); + return new Pair<>(ret, epsilonNext); + } + + + public void computeGradientAndScore(LayerWorkspaceMgr workspaceMgr) { + if (this.input == null) { + log.warn("There is no input for this layer '{}'", layerConfiguration); + return; + } + INDArray output = activate(true, workspaceMgr); + setScoreWithZ(output); + } + + + protected void setScoreWithZ(INDArray z) { + } + + /** + * Objective function: the specified objective + * + * @return the score for the objective + */ + + @Override + public double score() { + return score; + } + + @Override + public Gradient gradient() { + return gradient; + } + + + @Override + public ConvexOptimizer getOptimizer() { + if (optimizer == null) { + Solver solver = new Solver.Builder().model(this).configure(getNetConfiguration()).build(); + this.optimizer = solver.getOptimizer(); + } + return optimizer; + } + + /** + * Returns the parameters of the neural network as a flattened row vector + * + * @return the parameters of the neural network + */ + @Override + public INDArray params() { + return paramsFlattened; + } + + + public void setParamsTable(INDArray paramsTable) { + if (paramsTable == paramsFlattened) { + return; //no op + } + setParams(paramsTable, 'f'); + } + + protected void setParams(INDArray params, char order) { + List parameterList = layerConfiguration.getVariables(); //netWideVariables(); + int length = 0; + for (String s : parameterList) { + length += getParam(s).length(); + } + if (params.length() != length) { + throw new IllegalArgumentException("Unable to set parameters: must be of length " + length + + ", got params of length " + params.length() + " - " + layerId()); + } + int idx = 0; + Set paramKeySet = this.getParamTable().keySet(); + for (String s : paramKeySet) { + INDArray param = getParam(s); + INDArray get = params.get(NDArrayIndex.point(0), + NDArrayIndex.interval(idx, idx + param.length())); + if (param.length() != get.length()) { + throw new IllegalStateException( + "Parameter " + s + " should have been of length " + param.length() + + " but was " + get.length() + " - " + layerId()); } + param.assign(get.reshape(order, + param.shape())); //Use assign due to backprop params being a view of a larger array + idx += param.length(); + } + } - Gradient ret = new DefaultGradient(); + @Override + public void setParamsViewArray(INDArray params) { + if (this.paramsTable != null && params.length() != numParams()) { + throw new IllegalArgumentException("Invalid input: expect params of length " + numParams() + + ", got params of length " + params.length() + " - " + layerId()); + } - if(hasBias()){ - INDArray biasGrad = gradientViews.get(DefaultParamInitializer.BIAS_KEY); - delta.sum(biasGrad, 0); //biasGrad is initialized/zeroed first - ret.gradientForVariable().put(DefaultParamInitializer.BIAS_KEY, biasGrad); + this.paramsFlattened = params; + } + + @Override + public INDArray getGradientsViewArray() { + return gradientsFlattened; + } + + @Override + public void setBackpropGradientsViewArray(INDArray gradients) { + if (this.paramsTable != null && gradients.length() != numParams()) { + throw new IllegalArgumentException( + "Invalid input: expect gradients array of length " + numParams(true) + + ", got array of length " + gradients.length() + " - " + layerId()); + } + + this.gradientsFlattened = gradients; + this.gradientViews = layerConfiguration.initializer() + .getGradientsFromFlattened(layerConfiguration, gradients); + } + + /** + * Get the parameter, after applying any weight noise (such as DropConnect) if necessary. Note + * that during training, this will store the post-noise parameters, as these should be used for + * both forward pass and backprop, for a single iteration. Consequently, the parameters (post + * noise) should be cleared after each training iteration + * + * @param param Parameter key + * @param training If true: during training + * @return The parameter, after applying any noise + */ + protected INDArray getParamWithNoise(String param, boolean training, + LayerWorkspaceMgr workspaceMgr) { + INDArray p; + if (layerConf().getWeightNoise() != null) { + if (training && weightNoiseParams.size() > 0 && weightNoiseParams.containsKey(param)) { + //Re-use these weights for both forward pass and backprop - don't want to use 2 different params here + //These should be cleared during backprop + return weightNoiseParams.get(param); + } else { + try (MemoryWorkspace ws = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { + p = layerConf().getWeightNoise() + .getParameter(this, param, getIterationCount(), getEpochCount(), training, + workspaceMgr); } + } - INDArray W = getParamWithNoise(DefaultParamInitializer.WEIGHT_KEY, true, workspaceMgr); - - INDArray epsilonNext = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, delta.dataType(), new long[]{W.size(0), delta.size(0)}, 'f'); - if(hasLayerNorm()) { - INDArray g = getParam(DefaultParamInitializer.GAIN_KEY); - - INDArray dldg = gradientViews.get(DefaultParamInitializer.GAIN_KEY); - Nd4j.getExecutioner().exec(new LayerNormBp(preNorm, g, delta, delta, dldg, true, 1)); - ret.gradientForVariable().put(DefaultParamInitializer.GAIN_KEY, dldg); - - } - - epsilonNext = W.mmuli(delta.transpose(),epsilonNext).transpose(); //W.mmul(delta.transpose()).transpose(); - - INDArray weightGrad = gradientViews.get(DefaultParamInitializer.WEIGHT_KEY); //f order - Nd4j.gemm(input.castTo(weightGrad.dataType()), delta, weightGrad, true, false, 1.0, 0.0); //TODO avoid castTo? - ret.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, weightGrad); - - weightNoiseParams.clear(); - - epsilonNext = backpropDropOutIfPresent(epsilonNext); - return new Pair<>(ret, epsilonNext); + if (training) { + //Store for re-use in backprop + weightNoiseParams.put(param, p); + } + } else { + return getParam(param); } - public void fit() { - throw new UnsupportedOperationException("Not supported"); + return p; + } + + protected INDArray preOutput(boolean training, LayerWorkspaceMgr workspaceMgr) { + return preOutputWithPreNorm(training, false, workspaceMgr).getFirst(); + } + + protected Pair preOutputWithPreNorm(boolean training, boolean forBackprop, + LayerWorkspaceMgr workspaceMgr) { + assertInputSet(forBackprop); + applyDropOutIfNecessary(training, workspaceMgr); + INDArray W = getParamWithNoise(DefaultParamInitializer.WEIGHT_KEY, training, workspaceMgr); + INDArray b = getParamWithNoise(DefaultParamInitializer.BIAS_KEY, training, workspaceMgr); + INDArray g = (hasLayerNorm() ? getParam(DefaultParamInitializer.GAIN_KEY) : null); + + INDArray input = this.input.castTo(dataType); + + //Input validation: + if (input.rank() != 2 || input.columns() != W.rows()) { + if (input.rank() != 2) { + throw new DL4JInvalidInputException( + "Input that is not a matrix; expected matrix (rank 2), got rank " + + input.rank() + " array with shape " + Arrays.toString(input.shape()) + + ". Missing preprocessor or wrong input type? " + layerId()); + } + throw new DL4JInvalidInputException( + "Input size (" + input.columns() + " columns; shape = " + Arrays.toString(input.shape()) + + ") is invalid: does not match layer input size (layer # inputs = " + + W.size(0) + ") " + layerId()); } - @Override - public void computeGradientAndScore(LayerWorkspaceMgr workspaceMgr) { - if (this.input == null) - return; + INDArray ret = workspaceMgr.createUninitialized(ArrayType.ACTIVATIONS, W.dataType(), + input.size(0), W.size(1)); + input.castTo(ret.dataType()).mmuli(W, + ret); //TODO Can we avoid this cast? (It sohuld be a no op if not required, however) - INDArray output = activate(true, workspaceMgr); - setScoreWithZ(output); + INDArray preNorm = ret; + if (hasLayerNorm()) { + preNorm = (forBackprop ? ret.dup(ret.ordering()) : ret); + Nd4j.getExecutioner().exec(new LayerNorm(preNorm, g, ret, true, 1)); } - - protected void setScoreWithZ(INDArray z) {} - - /** - * Objective function: the specified objective - * @return the score for the objective - */ - - @Override - public double score() { - return score; + if (hasBias()) { + ret.addiRowVector(b); } - @Override - public Gradient gradient() { - return gradient; + if (maskArray != null) { + applyMask(ret); } - @Override - public void update(Gradient gradient) { - for (String paramType : gradient.gradientForVariable().keySet()) { - update(gradient.getGradientFor(paramType), paramType); - } + return new Pair<>(ret, preNorm); + } + + @Override + public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) { + INDArray z = preOutput(training, workspaceMgr); + INDArray ret = layerConf().getActivationFn().getActivation(z, training); + + if (maskArray != null) { + applyMask(ret); } - @Override - public void update(INDArray gradient, String paramType) { - setParam(paramType, getParam(paramType).addi(gradient)); + return ret; + } + + @Override + public double calcRegularizationScore(boolean backpropParamsOnly) { + double scoreSum = 0.0; + for (Map.Entry e : paramsTable.entrySet()) { + List l = layerConf().getRegularizationByParam(e.getKey()); + if (l == null || l.isEmpty()) { + continue; + } + for (Regularization r : l) { + scoreSum += r.score(e.getValue(), getIterationCount(), getEpochCount()); + } } + return scoreSum; + } + + @Override + public Layer clone() { + Layer layer = null; + try { + Constructor c = getClass().getConstructor(NeuralNetConfiguration.class); + layer = (Layer) c.newInstance(layerConfiguration); + Map linkedTable = new LinkedHashMap<>(); + for (Map.Entry entry : paramsTable.entrySet()) { + linkedTable.put(entry.getKey(), entry.getValue().dup()); + } + layer.setParamTable(linkedTable); + } catch (Exception e) { + log.error("", e); + } + + return layer; + + } + + + @Override + public void fit(INDArray input, LayerWorkspaceMgr workspaceMgr) { + if (input != null) { + setInput(input, workspaceMgr); + applyDropOutIfNecessary(true, workspaceMgr); + } + if (solver == null) { + solver = new Solver.Builder().model(this).configure(getNetConfiguration()) + .listeners(getTrainingListeners()).build(); + } + this.optimizer = solver.getOptimizer(); + solver.optimize(workspaceMgr); + } @Override - public ConvexOptimizer getOptimizer() { - if (optimizer == null) { - Solver solver = new Solver.Builder().model(this).configure(conf()).build(); - this.optimizer = solver.getOptimizer(); - } - return optimizer; - } + public String toString() { + return getClass().getName() + "{" + "conf=" + layerConfiguration + ", score=" + score + + ", optimizer=" + optimizer + ", listeners=" + trainingListeners + '}'; + } - /**Returns the parameters of the neural network as a flattened row vector - * @return the parameters of the neural network - */ - @Override - public INDArray params() { - return paramsFlattened; - } + @Override + public void clear() { + super.clear(); + weightNoiseParams.clear(); + } - @Override - public INDArray getParam(String param) { - return params.get(param); - } + @Override + public void clearNoiseWeightParams() { + weightNoiseParams.clear(); + } - @Override - public void setParam(String key, INDArray val) { - if (params.containsKey(key)) - params.get(key).assign(val); - else - params.put(key, val); - } + /** + * Does this layer have no bias term? Many layers (dense, convolutional, output, embedding) have + * biases by default, but no-bias versions are possible via configuration + * + * @return True if a bias term is present, false otherwise + */ + public boolean hasBias() { + //Overridden by layers supporting no bias mode: dense, output, convolutional, embedding + return true; + } - @Override - public void setParams(INDArray params) { - if (params == paramsFlattened) - return; //no op - setParams(params, 'f'); - } + /** + * Does this layer support and is it enabled layer normalization? Only Dense and SimpleRNN Layers + * support layer normalization. + * + * @return True if layer normalization is enabled on this layer, false otherwise + */ + public boolean hasLayerNorm() { + // Overridden by layers supporting layer normalization. + return false; + } - protected void setParams(INDArray params, char order) { - List parameterList = conf.variables(); - int length = 0; - for (String s : parameterList) - length += getParam(s).length(); - if (params.length() != length) - throw new IllegalArgumentException("Unable to set parameters: must be of length " + length - + ", got params of length " + params.length() + " - " + layerId()); - int idx = 0; - Set paramKeySet = this.params.keySet(); - for (String s : paramKeySet) { - INDArray param = getParam(s); - INDArray get = params.get(NDArrayIndex.point(0), NDArrayIndex.interval(idx, idx + param.length())); - if (param.length() != get.length()) - throw new IllegalStateException("Parameter " + s + " should have been of length " + param.length() - + " but was " + get.length() + " - " + layerId()); - param.assign(get.reshape(order, param.shape())); //Use assign due to backprop params being a view of a larger array - idx += param.length(); - } - } + /** + * The number of parameters (all types) for the model + * + * @return the number of parameters for the model + */ + public long numParams() { + int ret = 0; + for (INDArray val : paramsTable.values()) { + ret += val.length(); + } + return ret; + } - @Override - public void setParamsViewArray(INDArray params) { - if (this.params != null && params.length() != numParams()) - throw new IllegalArgumentException("Invalid input: expect params of length " + numParams() - + ", got params of length " + params.length() + " - " + layerId()); + /** + * Return a map of all parameters in the network. Parameter names are as described in + * {@link #getParam(String)}. As per {@link #getParam(String)} the returned arrays are views - + * modifications to these will impact the underlying network parameters + * + * @return A map of all parameters in the network + */ + @Override + public Map getParamTable() { + return getParamTable(false); + } - this.paramsFlattened = params; - } + /** + * Set the full table of parameters (of all types) + * + * @param paramTable ndarray parameters table + */ + @Override + public void setParamTable(@NonNull Map paramTable) { + this.paramsTable = paramTable; + } - @Override - public INDArray getGradientsViewArray() { - return gradientsFlattened; - } - - @Override - public void setBackpropGradientsViewArray(INDArray gradients) { - if (this.params != null && gradients.length() != numParams()) - throw new IllegalArgumentException("Invalid input: expect gradients array of length " + numParams(true) - + ", got array of length " + gradients.length() + " - " + layerId()); - - this.gradientsFlattened = gradients; - this.gradientViews = conf.getLayer().initializer().getGradientsFromFlattened(conf, gradients); - } - - @Override - public void setParamTable(Map paramTable) { - this.params = paramTable; - } - - @Override - public Map paramTable() { - return paramTable(false); - } - - @Override - public Map paramTable(boolean backpropParamsOnly) { - return params; - } - - /** - * Get the parameter, after applying any weight noise (such as DropConnect) if necessary. - * Note that during training, this will store the post-noise parameters, as these should be used - * for both forward pass and backprop, for a single iteration. - * Consequently, the parameters (post noise) should be cleared after each training iteration - * - * @param param Parameter key - * @param training If true: during training - * @return The parameter, after applying any noise - */ - protected INDArray getParamWithNoise(String param, boolean training, LayerWorkspaceMgr workspaceMgr){ - INDArray p; - if(layerConf().getWeightNoise() != null){ - if(training && weightNoiseParams.size() > 0 && weightNoiseParams.containsKey(param) ){ - //Re-use these weights for both forward pass and backprop - don't want to use 2 different params here - //These should be cleared during backprop - return weightNoiseParams.get(param); - } else { - try (MemoryWorkspace ws = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { - p = layerConf().getWeightNoise().getParameter(this, param, getIterationCount(), getEpochCount(), training, workspaceMgr); - } - } - - if(training){ - //Store for re-use in backprop - weightNoiseParams.put(param, p); - } - } else { - return getParam(param); - } - - return p; - } - - protected INDArray preOutput(boolean training, LayerWorkspaceMgr workspaceMgr) { - return preOutputWithPreNorm(training, false, workspaceMgr).getFirst(); - } - - protected Pair preOutputWithPreNorm(boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) { - assertInputSet(forBackprop); - applyDropOutIfNecessary(training, workspaceMgr); - INDArray W = getParamWithNoise(DefaultParamInitializer.WEIGHT_KEY, training, workspaceMgr); - INDArray b = getParamWithNoise(DefaultParamInitializer.BIAS_KEY, training, workspaceMgr); - INDArray g = (hasLayerNorm() ? getParam(DefaultParamInitializer.GAIN_KEY) : null); - - INDArray input = this.input.castTo(dataType); - - //Input validation: - if (input.rank() != 2 || input.columns() != W.rows()) { - if (input.rank() != 2) { - throw new DL4JInvalidInputException("Input that is not a matrix; expected matrix (rank 2), got rank " - + input.rank() + " array with shape " + Arrays.toString(input.shape()) - + ". Missing preprocessor or wrong input type? " + layerId()); - } - throw new DL4JInvalidInputException( - "Input size (" + input.columns() + " columns; shape = " + Arrays.toString(input.shape()) - + ") is invalid: does not match layer input size (layer # inputs = " - + W.size(0) + ") " + layerId()); - } - - - INDArray ret = workspaceMgr.createUninitialized(ArrayType.ACTIVATIONS, W.dataType(), input.size(0), W.size(1)); - input.castTo(ret.dataType()).mmuli(W, ret); //TODO Can we avoid this cast? (It sohuld be a no op if not required, however) - - INDArray preNorm = ret; - if(hasLayerNorm()){ - preNorm = (forBackprop ? ret.dup(ret.ordering()) : ret); - Nd4j.getExecutioner().exec(new LayerNorm(preNorm, g, ret, true, 1)); - } - - if(hasBias()){ - ret.addiRowVector(b); - } - - if (maskArray != null) { - applyMask(ret); - } - - return new Pair<>(ret, preNorm); - } - - @Override - public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) { - INDArray z = preOutput(training, workspaceMgr); - INDArray ret = layerConf().getActivationFn().getActivation(z, training); - - if (maskArray != null) { - applyMask(ret); - } - - return ret; - } - - @Override - public double calcRegularizationScore(boolean backpropParamsOnly){ - double scoreSum = 0.0; - for (Map.Entry e : paramTable().entrySet()) { - List l = layerConf().getRegularizationByParam(e.getKey()); - if(l == null || l.isEmpty()){ - continue; - } - for(Regularization r : l){ - scoreSum += r.score(e.getValue(), getIterationCount(), getEpochCount()); - } - } - return scoreSum; - } - - @Override - public Layer clone() { - Layer layer = null; - try { - Constructor c = getClass().getConstructor(NeuralNetConfiguration.class); - layer = (Layer) c.newInstance(conf); - Map linkedTable = new LinkedHashMap<>(); - for (Map.Entry entry : params.entrySet()) { - linkedTable.put(entry.getKey(), entry.getValue().dup()); - } - layer.setParamTable(linkedTable); - } catch (Exception e) { - log.error("",e); - } - - return layer; - - } - - /** - * The number of parameters for the model - * - * @return the number of parameters for the model - */ - @Override - public long numParams() { - int ret = 0; - for (INDArray val : params.values()) - ret += val.length(); - return ret; - } - - @Override - public void fit(INDArray input, LayerWorkspaceMgr workspaceMgr) { - if (input != null) { - setInput(input, workspaceMgr); - applyDropOutIfNecessary(true, workspaceMgr); - } - if (solver == null) { - solver = new Solver.Builder().model(this).configure(conf()).listeners(getListeners()).build(); - } - this.optimizer = solver.getOptimizer(); - solver.optimize(workspaceMgr); - } - - @Override - public String toString() { - return getClass().getName() + "{" + "conf=" + conf + ", score=" + score - + ", optimizer=" + optimizer + ", listeners=" + trainingListeners + '}'; - } - - @Override - public void clear(){ - super.clear(); - weightNoiseParams.clear(); - } - - @Override - public void clearNoiseWeightParams(){ - weightNoiseParams.clear(); - } - - /** - * Does this layer have no bias term? Many layers (dense, convolutional, output, embedding) have biases by - * default, but no-bias versions are possible via configuration - * - * @return True if a bias term is present, false otherwise - */ - public boolean hasBias(){ - //Overridden by layers supporting no bias mode: dense, output, convolutional, embedding - return true; - } - - /** - * Does this layer support and is it enabled layer normalization? Only Dense and SimpleRNN Layers support - * layer normalization. - * - * @return True if layer normalization is enabled on this layer, false otherwise - */ - public boolean hasLayerNorm(){ - // Overridden by layers supporting layer normalization. - return false; - } + @Override + public Map getParamTable(boolean backpropParamsOnly) { + return paramsTable; + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseOutputLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseOutputLayer.java index 1f317eee6..b06c9a3ed 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseOutputLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseOutputLayer.java @@ -23,6 +23,7 @@ package org.deeplearning4j.nn.layers; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.api.layers.IOutputLayer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.params.DefaultParamInitializer; @@ -58,7 +59,7 @@ public abstract class BaseOutputLayer { - public BasePretrainNetwork(NeuralNetConfiguration conf, DataType dataType) { + public BasePretrainNetwork(LayerConfiguration conf, DataType dataType) { super(conf, dataType); } @@ -119,52 +120,47 @@ public abstract class BasePretrainNetwork paramTable(boolean backpropParamsOnly) { + public Map getParamTable(boolean backpropParamsOnly) { if (!backpropParamsOnly) - return params; + return getParamTable(); Map map = new LinkedHashMap<>(); - map.put(PretrainParamInitializer.WEIGHT_KEY, params.get(PretrainParamInitializer.WEIGHT_KEY)); - map.put(PretrainParamInitializer.BIAS_KEY, params.get(PretrainParamInitializer.BIAS_KEY)); + map.put(PretrainParamInitializer.WEIGHT_KEY, super.getParamTable().get(PretrainParamInitializer.WEIGHT_KEY)); + map.put(PretrainParamInitializer.BIAS_KEY, super.getParamTable().get(PretrainParamInitializer.BIAS_KEY)); return map; } - - public INDArray params() { - return paramsFlattened; - } - /**The number of parameters for the model, for backprop (i.e., excluding visible bias) * @return the number of parameters for the model (ex. visible bias) */ public long numParams() { int ret = 0; - for (Map.Entry entry : params.entrySet()) { + for (Map.Entry entry : getParamTable().entrySet()) { ret += entry.getValue().length(); } return ret; } @Override - public void setParams(INDArray params) { - if (params == paramsFlattened) + public void setParamsTable(INDArray paramsTable) { + if (paramsTable == paramsFlattened) return; //No op //SetParams has two different uses: during pretrain vs. backprop. //pretrain = 3 sets of params (inc. visible bias); backprop = 2 - List parameterList = conf.variables(); + List parameterList = layerConfiguration.getVariables(); long paramLength = 0; for (String s : parameterList) { val len = getParam(s).length(); paramLength += len; } - if (params.length() != paramLength) { + if (paramsTable.length() != paramLength) { throw new IllegalArgumentException("Unable to set parameters: must be of length " + paramLength - + ", got params of length " + params.length() + " " + layerId()); + + ", got params of length " + paramsTable.length() + " " + layerId()); } // Set for backprop and only W & hb - paramsFlattened.assign(params); + paramsFlattened.assign(paramsTable); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/DropoutLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/DropoutLayer.java index 743186706..c89165431 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/DropoutLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/DropoutLayer.java @@ -21,6 +21,7 @@ package org.deeplearning4j.nn.layers; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.nd4j.linalg.api.buffer.DataType; @@ -31,7 +32,7 @@ import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; public class DropoutLayer extends BaseLayer { - public DropoutLayer(NeuralNetConfiguration conf, DataType dataType) { + public DropoutLayer(LayerConfiguration conf, DataType dataType) { super(conf, dataType); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/FrozenLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/FrozenLayer.java index 1e6c60add..75bf8ae01 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/FrozenLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/FrozenLayer.java @@ -49,8 +49,8 @@ public class FrozenLayer extends BaseWrapperLayer { throw new IllegalArgumentException("Output Layers are not allowed to be frozen " + layerId()); } this.zeroGradient = new DefaultGradient(insideLayer.params()); - if (insideLayer.paramTable() != null) { - for (String paramType : insideLayer.paramTable().keySet()) { + if (insideLayer.getParamTable() != null) { + for (String paramType : insideLayer.getParamTable().keySet()) { //save memory?? zeroGradient.setGradientFor(paramType, null); } @@ -63,7 +63,7 @@ public class FrozenLayer extends BaseWrapperLayer { } protected String layerId() { - String name = underlying.conf().getLayer().getLayerName(); + String name = underlying.getLayerConfiguration().getLayerName(); return "(layer name: " + (name == null ? "\"\"" : name) + ", layer index: " + underlying.getIndex() + ")"; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/FrozenLayerWithBackprop.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/FrozenLayerWithBackprop.java index 918a21a4a..425ec454f 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/FrozenLayerWithBackprop.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/FrozenLayerWithBackprop.java @@ -46,7 +46,7 @@ public class FrozenLayerWithBackprop extends BaseWrapperLayer { } protected String layerId() { - String name = underlying.conf().getLayer().getLayerName(); + String name = underlying.getLayerConfiguration().getLayerName(); return "(layer name: " + (name == null ? "\"\"" : name) + ", layer index: " + underlying.getIndex() + ")"; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/LossLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/LossLayer.java index e53fc6619..e13a06219 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/LossLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/LossLayer.java @@ -24,6 +24,7 @@ package org.deeplearning4j.nn.layers; import org.deeplearning4j.eval.Evaluation; import org.deeplearning4j.nn.api.layers.IOutputLayer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.optimize.Solver; @@ -54,7 +55,7 @@ public class LossLayer extends BaseLayer { - public OutputLayer(NeuralNetConfiguration conf, DataType dataType) { + public OutputLayer(LayerConfiguration conf, DataType dataType) { super(conf, dataType); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/RepeatVector.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/RepeatVector.java index 84dd1fd1f..c36bf9366 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/RepeatVector.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/RepeatVector.java @@ -25,6 +25,7 @@ import org.deeplearning4j.exception.DL4JInvalidInputException; import org.deeplearning4j.nn.conf.CacheMode; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.RNNFormat; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.workspace.ArrayType; @@ -38,7 +39,7 @@ import java.util.Arrays; public class RepeatVector extends AbstractLayer { - public RepeatVector(NeuralNetConfiguration conf, DataType dataType) { + public RepeatVector(LayerConfiguration conf, DataType dataType) { super(conf, dataType); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Cnn3DLossLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Cnn3DLossLayer.java index b63978b10..2fcdcd17a 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Cnn3DLossLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Cnn3DLossLayer.java @@ -28,6 +28,7 @@ import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.api.layers.IOutputLayer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.Convolution3D; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.BaseLayer; @@ -49,7 +50,19 @@ public class Cnn3DLossLayer extends BaseLayer { private final int[] cropping; //[padTop, padBottom] - public Cropping1DLayer(NeuralNetConfiguration conf, DataType dataType) { + public Cropping1DLayer(LayerConfiguration conf, DataType dataType) { super(conf, dataType); - this.cropping = ((org.deeplearning4j.nn.conf.layers.convolutional.Cropping1D) conf.getLayer()).getCropping(); + this.cropping = layerConfiguration.getCropping(); } @Override @@ -79,7 +80,8 @@ public class Cropping1DLayer extends AbstractLayer { @Override public Layer clone() { - return new Cropping2DLayer(conf.clone(), dataType); + + return new Cropping2DLayer(layerConfiguration.clone(), dataType); } @Override diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Cropping2DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Cropping2DLayer.java index 3d6beac05..d72d2f3eb 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Cropping2DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Cropping2DLayer.java @@ -24,6 +24,7 @@ import lombok.val; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.AbstractLayer; @@ -40,11 +41,12 @@ public class Cropping2DLayer extends AbstractLayer { - public Deconvolution3DLayer(NeuralNetConfiguration conf, DataType dataType) { + public Deconvolution3DLayer(LayerConfiguration conf, DataType dataType) { super(conf, dataType); } @@ -135,7 +136,7 @@ public class Deconvolution3DLayer extends BaseLayer { boolean ncdhw = layerConf().getDataFormat() == Convolution3D.DataFormat.NCDHW; int chDim = ncdhw ? 1 : 4; if (input.size(chDim) != layerConf().getNIn() ) { - String layerName = conf.getLayer().getLayerName(); + String layerName = getLayerConfiguration().getLayerName(); if (layerName == null) layerName = "(not named)"; throw new DL4JInvalidInputException("Cannot do forward pass in Deconvolution3D layer (layer name = " + layerName diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/DepthwiseConvolution2DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/DepthwiseConvolution2DLayer.java index c63aeb3f9..888875129 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/DepthwiseConvolution2DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/DepthwiseConvolution2DLayer.java @@ -26,6 +26,7 @@ import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.CacheMode; import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.params.DepthwiseConvolutionParamInitializer; @@ -45,7 +46,7 @@ import java.util.Arrays; public class DepthwiseConvolution2DLayer extends ConvolutionLayer { - public DepthwiseConvolution2DLayer(NeuralNetConfiguration conf, DataType dataType) { + public DepthwiseConvolution2DLayer(LayerConfiguration conf, DataType dataType) { super(conf, dataType); } @@ -152,7 +153,7 @@ public class DepthwiseConvolution2DLayer extends ConvolutionLayer { getParamWithNoise(DepthwiseConvolutionParamInitializer.WEIGHT_KEY, training, workspaceMgr); if (input.rank() != 4) { - String layerName = conf.getLayer().getLayerName(); + String layerName = layerConfiguration.getLayerName(); if (layerName == null) layerName = "(not named)"; throw new DL4JInvalidInputException("Got rank " + input.rank() @@ -174,7 +175,7 @@ public class DepthwiseConvolution2DLayer extends ConvolutionLayer { long outDepth = depthMultiplier * inDepth; if (input.size(nchw ? 1 : 3) != inDepth) { - String layerName = conf.getLayer().getLayerName(); + String layerName = layerConfiguration.getLayerName(); if (layerName == null) layerName = "(not named)"; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SeparableConvolution2DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SeparableConvolution2DLayer.java index d5d0ebf0f..d205017bf 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SeparableConvolution2DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SeparableConvolution2DLayer.java @@ -26,6 +26,7 @@ import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.CacheMode; import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.params.ConvolutionParamInitializer; @@ -46,7 +47,7 @@ import java.util.Arrays; public class SeparableConvolution2DLayer extends ConvolutionLayer { - public SeparableConvolution2DLayer(NeuralNetConfiguration conf, DataType dataType) { + public SeparableConvolution2DLayer(LayerConfiguration conf, DataType dataType) { super(conf, dataType); } @@ -176,7 +177,7 @@ public class SeparableConvolution2DLayer extends ConvolutionLayer { int wIdx = 3; if (input.rank() != 4) { - String layerName = conf.getLayer().getLayerName(); + String layerName = getLayerConfiguration().getLayerName(); if (layerName == null) layerName = "(not named)"; throw new DL4JInvalidInputException("Got rank " + input.rank() @@ -193,7 +194,7 @@ public class SeparableConvolution2DLayer extends ConvolutionLayer { long outDepth = pointWiseWeights.size(0); if (input.size(chIdx) != inDepth) { - String layerName = conf.getLayer().getLayerName(); + String layerName = getLayerConfiguration().getLayerName(); if (layerName == null) layerName = "(not named)"; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SpaceToBatch.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SpaceToBatch.java index 6abd39baa..fb824dfa3 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SpaceToBatch.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SpaceToBatch.java @@ -24,6 +24,7 @@ import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.exception.DL4JInvalidInputException; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.AbstractLayer; @@ -42,7 +43,7 @@ import java.util.Arrays; @Slf4j public class SpaceToBatch extends AbstractLayer { - public SpaceToBatch(NeuralNetConfiguration conf, DataType dataType) { + public SpaceToBatch(LayerConfiguration conf, DataType dataType) { super(conf, dataType); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SpaceToDepth.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SpaceToDepth.java index aa0fd2ebb..32a74d80d 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SpaceToDepth.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SpaceToDepth.java @@ -24,6 +24,7 @@ import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.exception.DL4JInvalidInputException; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.AbstractLayer; @@ -43,7 +44,7 @@ import java.util.Arrays; @Slf4j public class SpaceToDepth extends AbstractLayer { - public SpaceToDepth(NeuralNetConfiguration conf, DataType dataType) { + public SpaceToDepth(LayerConfiguration conf, DataType dataType) { super(conf, dataType); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ZeroPadding1DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ZeroPadding1DLayer.java index 386c312e6..f6bfcb3cf 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ZeroPadding1DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ZeroPadding1DLayer.java @@ -23,6 +23,7 @@ package org.deeplearning4j.nn.layers.convolution; import lombok.val; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.AbstractLayer; @@ -38,9 +39,9 @@ public class ZeroPadding1DLayer extends AbstractLayer { - public ZeroPaddingLayer(NeuralNetConfiguration conf, DataType dataType) { + public ZeroPaddingLayer(LayerConfiguration conf, DataType dataType) { super(conf, dataType); } @@ -115,7 +116,7 @@ public class ZeroPaddingLayer extends AbstractLayer { - public Upsampling2D(NeuralNetConfiguration conf, DataType dataType) { + public Upsampling2D(LayerConfiguration conf, DataType dataType) { super(conf, dataType); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling3D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling3D.java index 0df9431c7..9fa3e6365 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling3D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling3D.java @@ -24,6 +24,7 @@ import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.exception.DL4JInvalidInputException; import org.deeplearning4j.nn.conf.CacheMode; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.AbstractLayer; @@ -44,7 +45,7 @@ import java.util.Arrays; public class Upsampling3D extends AbstractLayer { - public Upsampling3D(NeuralNetConfiguration conf, DataType dataType) { + public Upsampling3D(LayerConfiguration conf, DataType dataType) { super(conf, dataType); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/PReLU.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/PReLU.java index fcf624932..8a715f100 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/PReLU.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/PReLU.java @@ -21,6 +21,7 @@ package org.deeplearning4j.nn.layers.feedforward; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.BaseLayer; @@ -38,7 +39,7 @@ public class PReLU extends BaseLayer { - public AutoEncoder(NeuralNetConfiguration conf, DataType dataType) { + public AutoEncoder(LayerConfiguration conf, DataType dataType) { super(conf, dataType); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/dense/DenseLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/dense/DenseLayer.java index d2aa10406..362fb8db8 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/dense/DenseLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/dense/DenseLayer.java @@ -21,6 +21,7 @@ package org.deeplearning4j.nn.layers.feedforward.dense; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.layers.BaseLayer; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; @@ -31,7 +32,7 @@ import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; */ public class DenseLayer extends BaseLayer { - public DenseLayer(NeuralNetConfiguration conf, DataType dataType) { + public DenseLayer(LayerConfiguration conf, DataType dataType) { super(conf, dataType); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/elementwise/ElementWiseMultiplicationLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/elementwise/ElementWiseMultiplicationLayer.java index b65172652..f799b7373 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/elementwise/ElementWiseMultiplicationLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/elementwise/ElementWiseMultiplicationLayer.java @@ -21,6 +21,7 @@ package org.deeplearning4j.nn.layers.feedforward.elementwise; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.params.ElementWiseParamInitializer; import org.deeplearning4j.exception.DL4JInvalidInputException; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -38,7 +39,7 @@ import java.util.Arrays; public class ElementWiseMultiplicationLayer extends BaseLayer { - public ElementWiseMultiplicationLayer(NeuralNetConfiguration conf, DataType dataType){ + public ElementWiseMultiplicationLayer(LayerConfiguration conf, DataType dataType){ super(conf, dataType); } @@ -68,7 +69,7 @@ public class ElementWiseMultiplicationLayer extends BaseLayer { private static final int[] DIM_1 = new int[]{1}; - public EmbeddingLayer(NeuralNetConfiguration conf, DataType dataType) { + public EmbeddingLayer(LayerConfiguration conf, DataType dataType) { super(conf, dataType); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/embedding/EmbeddingSequenceLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/embedding/EmbeddingSequenceLayer.java index 762407264..6760caa1a 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/embedding/EmbeddingSequenceLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/embedding/EmbeddingSequenceLayer.java @@ -25,6 +25,7 @@ import lombok.val; import org.deeplearning4j.exception.DL4JInvalidInputException; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.RNNFormat; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.BaseLayer; @@ -45,7 +46,7 @@ import static org.nd4j.linalg.api.shape.Shape.hasDefaultStridesForShape; public class EmbeddingSequenceLayer extends BaseLayer { private static final int[] WEIGHT_DIM = new int[]{1}; - public EmbeddingSequenceLayer(NeuralNetConfiguration conf, DataType dataType) { + public EmbeddingSequenceLayer(LayerConfiguration conf, DataType dataType) { super(conf, dataType); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/mkldnn/MKLDNNLSTMHelper.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/mkldnn/MKLDNNLSTMHelper.java index a8803eda2..c5b159fb9 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/mkldnn/MKLDNNLSTMHelper.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/mkldnn/MKLDNNLSTMHelper.java @@ -118,7 +118,7 @@ public class MKLDNNLSTMHelper implements LSTMHelper { if(prevMemCellState != null) args.add(prevMemCellState); - IActivation a = ((LSTM)conf.getLayer()).getActivationFn(); + IActivation a = ((LSTM)layer.getLayerConfiguration()).getActivationFn(); DynamicCustomOp op = DynamicCustomOp.builder("lstmLayer") .addInputs(args.toArray(new INDArray[0])) diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/BatchNormalization.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/BatchNormalization.java index 6d4b65b0b..75bd4866d 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/BatchNormalization.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/BatchNormalization.java @@ -25,6 +25,7 @@ import lombok.val; import org.deeplearning4j.common.config.DL4JClassLoading; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.BaseLayer; @@ -47,7 +48,6 @@ import org.nd4j.linalg.exception.ND4JOpProfilerException; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.ops.transforms.Transforms; import org.nd4j.common.primitives.Pair; -import org.nd4j.common.util.OneTimeLogger; import java.util.*; @@ -63,7 +63,7 @@ public class BatchNormalization extends BaseLayer getListeners() { - return listeners; - } @Override public void setListeners(TrainingListener... listeners) { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/LocalResponseNormalization.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/LocalResponseNormalization.java index 9bfa02687..957a56632 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/LocalResponseNormalization.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/LocalResponseNormalization.java @@ -26,6 +26,7 @@ import org.deeplearning4j.common.config.DL4JClassLoading; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.AbstractLayer; @@ -54,11 +55,12 @@ public class LocalResponseNormalization @Override public Layer clone() { - return new LocalResponseNormalization(conf.clone(), dataType); + return new LocalResponseNormalization(getLayerConfiguration().clone(), dataType); } - public LocalResponseNormalization(NeuralNetConfiguration conf, DataType dataType) { + public LocalResponseNormalization(LayerConfiguration conf, DataType dataType) { super(conf, dataType); + layerConfiguration = (org.deeplearning4j.nn.conf.layers.LocalResponseNormalization) conf; initializeHelper(); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/objdetect/Yolo2OutputLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/objdetect/Yolo2OutputLayer.java index 69016dca4..e5f0fbf1e 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/objdetect/Yolo2OutputLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/objdetect/Yolo2OutputLayer.java @@ -25,6 +25,7 @@ import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.layers.IOutputLayer; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.AbstractLayer; @@ -68,7 +69,7 @@ public class Yolo2OutputLayer extends AbstractLayer pair = getGradientsAndDelta(preOutput2d(true, workspaceMgr), workspaceMgr); //Returns Gradient and delta^(this), not Gradient and epsilon^(this-1) //150 - long inputShape = (( org.deeplearning4j.nn.conf.ocnn.OCNNOutputLayer) this.getConf().getLayer()).getNIn(); + long inputShape = (( org.deeplearning4j.nn.conf.ocnn.OCNNOutputLayer) getLayerConfiguration()).getNIn(); INDArray delta = pair.getSecond(); //4 x 150 INDArray epsilonNext = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, input.dataType(), new long[]{inputShape, delta.length()}, 'f'); @@ -125,7 +126,7 @@ public class OCNNOutputLayer extends BaseOutputLayer paramKeys(Layer layer) { + public List paramKeys(LayerConfiguration layer) { return PARAM_KEYS; } @Override - public List weightKeys(Layer layer) { + public List weightKeys(LayerConfiguration layer) { return WEIGHT_KEYS; } @Override - public List biasKeys(Layer layer) { + public List biasKeys(LayerConfiguration layer) { return Collections.emptyList(); } @Override - public boolean isWeightParam(Layer layer, String key) { + public boolean isWeightParam(LayerConfiguration layer, String key) { return WEIGHT_KEYS.contains(key); } @Override - public boolean isBiasParam(Layer layer, String key) { + public boolean isBiasParam(LayerConfiguration layer, String key) { return false; } @Override - public Map init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { - org.deeplearning4j.nn.conf.ocnn.OCNNOutputLayer ocnnOutputLayer = ( org.deeplearning4j.nn.conf.ocnn.OCNNOutputLayer) conf.getLayer(); + public Map init(LayerConfiguration conf, INDArray paramsView, boolean initializeParams) { + org.deeplearning4j.nn.conf.ocnn.OCNNOutputLayer ocnnOutputLayer = ( org.deeplearning4j.nn.conf.ocnn.OCNNOutputLayer) conf; Map params = Collections.synchronizedMap(new LinkedHashMap()); val nIn = ocnnOutputLayer.getNIn(); int hiddenLayer = ocnnOutputLayer.getHiddenSize(); @@ -133,8 +127,8 @@ public class OCNNParamInitializer extends DefaultParamInitializer { } @Override - public Map getGradientsFromFlattened(NeuralNetConfiguration conf, INDArray gradientView) { - org.deeplearning4j.nn.conf.ocnn.OCNNOutputLayer ocnnOutputLayer = ( org.deeplearning4j.nn.conf.ocnn.OCNNOutputLayer) conf.getLayer(); + public Map getGradientsFromFlattened(LayerConfiguration conf, INDArray gradientView) { + org.deeplearning4j.nn.conf.ocnn.OCNNOutputLayer ocnnOutputLayer = ( org.deeplearning4j.nn.conf.ocnn.OCNNOutputLayer) conf; Map params = Collections.synchronizedMap(new LinkedHashMap()); val nIn = ocnnOutputLayer.getNIn(); val hiddenLayer = ocnnOutputLayer.getHiddenSize(); @@ -155,11 +149,11 @@ public class OCNNParamInitializer extends DefaultParamInitializer { } - protected INDArray createWeightMatrix(NeuralNetConfiguration configuration, + protected INDArray createWeightMatrix(LayerConfiguration configuration, INDArray weightParamView, boolean initializeParameters) { - org.deeplearning4j.nn.conf.ocnn.OCNNOutputLayer ocnnOutputLayer = ( org.deeplearning4j.nn.conf.ocnn.OCNNOutputLayer) configuration.getLayer(); + org.deeplearning4j.nn.conf.ocnn.OCNNOutputLayer ocnnOutputLayer = ( org.deeplearning4j.nn.conf.ocnn.OCNNOutputLayer) configuration; IWeightInit weightInit = ocnnOutputLayer.getWeightInitFn(); if (initializeParameters) { INDArray ret = weightInit.init(weightParamView.size(0), //Fan in diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/pooling/GlobalPoolingLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/pooling/GlobalPoolingLayer.java index e8b8ae9a3..b342f5032 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/pooling/GlobalPoolingLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/pooling/GlobalPoolingLayer.java @@ -20,11 +20,13 @@ package org.deeplearning4j.nn.layers.pooling; +import java.util.Map; import lombok.val; import org.apache.commons.lang3.ArrayUtils; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.PoolingType; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; @@ -54,11 +56,11 @@ public class GlobalPoolingLayer extends AbstractLayer paramTable) { + throw new RuntimeException("Not implemented."); + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/BaseRecurrentLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/BaseRecurrentLayer.java index 6b02b6c6e..1216c9d8c 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/BaseRecurrentLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/BaseRecurrentLayer.java @@ -24,6 +24,7 @@ import org.deeplearning4j.nn.api.layers.RecurrentLayer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.RNNFormat; import org.deeplearning4j.nn.conf.inputs.InputType; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.layers.BaseLayer; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.nd4j.linalg.api.buffer.DataType; @@ -50,7 +51,7 @@ public abstract class BaseRecurrentLayer gradientViews; @@ -65,14 +75,25 @@ public class BidirectionalLayer implements RecurrentLayer { private INDArray outFwd; private INDArray outBwd; - public BidirectionalLayer(@NonNull NeuralNetConfiguration conf, @NonNull Layer fwd, @NonNull Layer bwd, @NonNull INDArray paramsView) { - this.conf = conf; + public BidirectionalLayer(@NonNull LayerConfiguration conf, @NonNull Layer fwd, @NonNull Layer bwd, @NonNull INDArray paramsView) { + this.layerConfiguration = conf; + this.conf = conf.getNetConfiguration(); this.fwd = fwd; this.bwd = bwd; - this.layerConf = (Bidirectional) conf.getLayer(); + this.layerConf = (Bidirectional) layerConfiguration; this.paramsView = paramsView; } + /** + * Return the configuration of this layer + * + * @return the configuration + */ + @Override + public LayerConfiguration getLayerConfiguration() { + return layerConf; + } + private RNNFormat getRNNDataFormat(){ return layerConf.getRNNDataFormat(); } @@ -283,7 +304,7 @@ public class BidirectionalLayer implements RecurrentLayer { @Override public TrainingConfig getConfig() { - return conf.getLayer(); + return layerConfiguration; } @Override @@ -349,13 +370,23 @@ public class BidirectionalLayer implements RecurrentLayer { } @Override - public NeuralNetConfiguration conf() { + public NeuralNetConfiguration getNetConfiguration() { return conf; } + /** + * @param netConfiguration + */ @Override - public void setConf(NeuralNetConfiguration conf) { - this.conf = conf; + public void setNetConfiguration(@NonNull NeuralNetConfiguration netConfiguration) { + + } + + + public void setLayerConfiguration(LayerConfiguration layerConfiguration) { + this.layerConfiguration = layerConfiguration; + this.layerConf = (Bidirectional) layerConfiguration; + this.conf = layerConfiguration.getNetConfiguration(); } @Override @@ -363,11 +394,86 @@ public class BidirectionalLayer implements RecurrentLayer { return input; } + /** + * This method returns updater state (if applicable), null otherwise + * + * @return + */ + @Override + public INDArray updaterState() { + return null; + } + @Override public ConvexOptimizer getOptimizer() { return null; } + /** + * This method fits model with a given DataSet + * + * @param dataSet + */ + @Override + public void fit(DataSet dataSet) { + + } + + /** + * This method fits model with a given MultiDataSet + * + * @param dataSet + */ + @Override + public void fit(MultiDataSet dataSet) { + + } + + /** + * This method fits model with a given DataSetIterator + * + * @param iterator + */ + @Override + public void fit(DataSetIterator iterator) { + + } + + /** + * This method fits model with a given MultiDataSetIterator + * + * @param iterator + */ + @Override + public void fit(MultiDataSetIterator iterator) { + + } + + /** + * This method executes evaluation of the model against given iterator and evaluation + * implementations + * + * @param iterator + * @param evaluations + */ + @Override + public T[] doEvaluation(DataSetIterator iterator, T... evaluations) { + return null; + } + + /** + * This method executes evaluation of the model against given iterator and evaluation + * implementations + * + * @param iterator + * @param evaluations + */ + @Override + public T[] doEvaluation(MultiDataSetIterator iterator, + T... evaluations) { + return null; + } + @Override public INDArray getParam(String param) { String sub = param.substring(1); @@ -379,17 +485,17 @@ public class BidirectionalLayer implements RecurrentLayer { } @Override - public Map paramTable() { - return paramTable(false); + public Map getParamTable() { + return getParamTable(false); } @Override - public Map paramTable(boolean backpropParamsOnly) { + public Map getParamTable(boolean backpropParamsOnly) { Map m = new LinkedHashMap<>(); - for(Map.Entry e : fwd.paramTable(backpropParamsOnly).entrySet()){ + for(Map.Entry e : fwd.getParamTable(backpropParamsOnly).entrySet()){ m.put(BidirectionalParamInitializer.FORWARD_PREFIX + e.getKey(), e.getValue()); } - for(Map.Entry e : bwd.paramTable(backpropParamsOnly).entrySet()){ + for(Map.Entry e : bwd.getParamTable(backpropParamsOnly).entrySet()){ m.put(BidirectionalParamInitializer.BACKWARD_PREFIX + e.getKey(), e.getValue()); } return m; @@ -442,10 +548,9 @@ public class BidirectionalLayer implements RecurrentLayer { //No op } - @Override public void setListeners(Collection listeners) { - fwd.setListeners(listeners); - bwd.setListeners(listeners); + fwd.setListeners(listeners.toArray(new TrainingListener[]{})); + bwd.setListeners(listeners.toArray(new TrainingListener[]{})); } @Override diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTM.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTM.java index 99a2081dc..ac5c57165 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTM.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTM.java @@ -24,6 +24,7 @@ import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.CacheMode; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.params.GravesBidirectionalLSTMParamInitializer; @@ -41,7 +42,7 @@ public class GravesBidirectionalLSTM protected FwdPassReturn cachedPassForward; protected FwdPassReturn cachedPassBackward; - public GravesBidirectionalLSTM(NeuralNetConfiguration conf, DataType dataType) { + public GravesBidirectionalLSTM(LayerConfiguration conf, DataType dataType) { super(conf, dataType); } @@ -74,7 +75,7 @@ public class GravesBidirectionalLSTM final FwdPassReturn fwdPass = activateHelperDirectional(true, null, null, true, true, workspaceMgr); fwdPass.fwdPassOutput = permuteIfNWC(fwdPass.fwdPassOutput); final Pair forwardsGradient = LSTMHelpers.backpropGradientHelper(this, - this.conf, + this.layerConfiguration.getNetConfiguration(), this.layerConf().getGateActivationFn(), permuteIfNWC(this.input), getParam(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_FORWARDS), getParam(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS), permuteIfNWC(epsilon), @@ -89,7 +90,7 @@ public class GravesBidirectionalLSTM final FwdPassReturn backPass = activateHelperDirectional(true, null, null, true, false, workspaceMgr); final Pair backwardsGradient = LSTMHelpers.backpropGradientHelper(this, - this.conf, + this.layerConfiguration.getNetConfiguration(), this.layerConf().getGateActivationFn(), permuteIfNWC(this.input), getParam(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_BACKWARDS), getParam(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_BACKWARDS), permuteIfNWC(epsilon), @@ -117,7 +118,7 @@ public class GravesBidirectionalLSTM final Gradient correctOrderedGradient = new DefaultGradient(); - for (final String key : params.keySet()) { + for (final String key : paramsTable.keySet()) { correctOrderedGradient.setGradientFor(key, combinedGradient.getGradientFor(key)); } @@ -155,7 +156,7 @@ public class GravesBidirectionalLSTM cachedPassForward = null; } else { - forwardsEval = LSTMHelpers.activateHelper(this, this.conf, this.layerConf().getGateActivationFn(), + forwardsEval = LSTMHelpers.activateHelper(this, this.layerConfiguration.getNetConfiguration(), this.layerConf().getGateActivationFn(), permuteIfNWC(this.input), getParam(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_FORWARDS), getParam(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS), getParam(GravesBidirectionalLSTMParamInitializer.BIAS_KEY_FORWARDS), training, null, null, @@ -163,7 +164,7 @@ public class GravesBidirectionalLSTM GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS, maskArray, true, null, forBackprop ? cacheMode : CacheMode.NONE, workspaceMgr, layerConf().isHelperAllowFallback()); - backwardsEval = LSTMHelpers.activateHelper(this, this.conf, this.layerConf().getGateActivationFn(), + backwardsEval = LSTMHelpers.activateHelper(this, this.layerConfiguration.getNetConfiguration(), this.layerConf().getGateActivationFn(), permuteIfNWC(this.input), getParam(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_BACKWARDS), getParam(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_BACKWARDS), @@ -215,7 +216,7 @@ public class GravesBidirectionalLSTM biasKey = GravesBidirectionalLSTMParamInitializer.BIAS_KEY_BACKWARDS; } - FwdPassReturn ret = LSTMHelpers.activateHelper(this, this.conf, this.layerConf().getGateActivationFn(), permuteIfNWC(this.input), + FwdPassReturn ret = LSTMHelpers.activateHelper(this, this.layerConfiguration.getNetConfiguration(), this.layerConf().getGateActivationFn(), permuteIfNWC(this.input), getParam(recurrentKey), getParam(inputKey), getParam(biasKey), training, prevOutputActivations, prevMemCellState, forBackprop, forwards, inputKey, maskArray, true, null, forBackprop ? cacheMode : CacheMode.NONE, workspaceMgr, layerConf().isHelperAllowFallback()); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesLSTM.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesLSTM.java index 1e37cfe32..5aedd780b 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesLSTM.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesLSTM.java @@ -24,6 +24,7 @@ import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.CacheMode; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.params.GravesLSTMParamInitializer; import org.nd4j.common.base.Preconditions; @@ -40,7 +41,7 @@ public class GravesLSTM extends BaseRecurrentLayer p = LSTMHelpers.backpropGradientHelper(this, - this.conf, this.layerConf().getGateActivationFn(), permuteIfNWC(this.input), + this.layerConfiguration.getNetConfiguration(), this.layerConf().getGateActivationFn(), permuteIfNWC(this.input), recurrentWeights, inputWeights, permuteIfNWC(epsilon), truncatedBPTT, tbpttBackwardLength, fwdPass, true, GravesLSTMParamInitializer.INPUT_WEIGHT_KEY, GravesLSTMParamInitializer.RECURRENT_WEIGHT_KEY, GravesLSTMParamInitializer.BIAS_KEY, gradientViews, maskArray, true, null, @@ -128,7 +129,7 @@ public class GravesLSTM extends BaseRecurrentLayer { @@ -45,7 +45,7 @@ public class LSTM extends BaseRecurrentLayer p = LSTMHelpers.backpropGradientHelper(this, - this.conf, this.layerConf().getGateActivationFn(), permuteIfNWC(this.input), + getNetConfiguration(), this.layerConf().getGateActivationFn(), permuteIfNWC(this.input), recurrentWeights, inputWeights, permuteIfNWC(epsilon), truncatedBPTT, tbpttBackwardLength, fwdPass, true, LSTMParamInitializer.INPUT_WEIGHT_KEY, LSTMParamInitializer.RECURRENT_WEIGHT_KEY, LSTMParamInitializer.BIAS_KEY, gradientViews, null, false, helper, workspaceMgr, @@ -161,7 +161,7 @@ public class LSTM extends BaseRecurrentLayer= endIdx; iTimeIndex--) { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LastTimeStepLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LastTimeStepLayer.java index 4656ce9d1..da5f0b782 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LastTimeStepLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LastTimeStepLayer.java @@ -57,7 +57,7 @@ public class LastTimeStepLayer extends BaseWrapperLayer { public Pair backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { long[] newEpsShape = origOutputShape; - boolean nwc = TimeSeriesUtils.getFormatFromRnnLayer(underlying.conf().getLayer()) == RNNFormat.NWC; + boolean nwc = TimeSeriesUtils.getFormatFromRnnLayer(underlying.getLayerConfiguration()) == RNNFormat.NWC; INDArray newEps = Nd4j.create(epsilon.dataType(), newEpsShape, 'f'); if(lastTimeStepIdxs == null){ //no mask case @@ -119,7 +119,7 @@ public class LastTimeStepLayer extends BaseWrapperLayer { "rank " + in.rank() + " with shape " + Arrays.toString(in.shape())); } origOutputShape = in.shape(); - boolean nwc = TimeSeriesUtils.getFormatFromRnnLayer(underlying.conf().getLayer()) == RNNFormat.NWC; + boolean nwc = TimeSeriesUtils.getFormatFromRnnLayer(underlying.getLayerConfiguration()) == RNNFormat.NWC; // underlying instanceof BaseRecurrentLayer && ((BaseRecurrentLayer)underlying).getDataFormat() == RNNFormat.NWC)|| // underlying instanceof MaskZeroLayer && ((MaskZeroLayer)underlying).getUnderlying() instanceof BaseRecurrentLayer && // ((BaseRecurrentLayer)((MaskZeroLayer)underlying).getUnderlying()).getDataFormat() == RNNFormat.NWC; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/RnnLossLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/RnnLossLayer.java index fb2117b9b..de9d75928 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/RnnLossLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/RnnLossLayer.java @@ -27,6 +27,7 @@ import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.api.layers.IOutputLayer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.RNNFormat; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.BaseLayer; @@ -47,7 +48,7 @@ import java.util.List; public class RnnLossLayer extends BaseLayer implements IOutputLayer { @Setter @Getter protected INDArray labels; - public RnnLossLayer(NeuralNetConfiguration conf, DataType dataType) { + public RnnLossLayer(LayerConfiguration conf, DataType dataType) { super(conf, dataType); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/RnnOutputLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/RnnOutputLayer.java index edede1c1f..63f64e95e 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/RnnOutputLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/RnnOutputLayer.java @@ -24,6 +24,7 @@ import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.RNNFormat; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.BaseOutputLayer; import org.deeplearning4j.nn.params.DefaultParamInitializer; @@ -40,7 +41,7 @@ import java.util.Arrays; public class RnnOutputLayer extends BaseOutputLayer { - public RnnOutputLayer(NeuralNetConfiguration conf, DataType dataType) { + public RnnOutputLayer(LayerConfiguration conf, DataType dataType) { super(conf, dataType); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/SimpleRnn.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/SimpleRnn.java index 0176ce720..b0437fe1a 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/SimpleRnn.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/SimpleRnn.java @@ -22,6 +22,7 @@ package org.deeplearning4j.nn.layers.recurrent; import lombok.val; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.params.SimpleRnnParamInitializer; @@ -47,7 +48,7 @@ public class SimpleRnn extends BaseRecurrentLayer paramTable(boolean backpropOnly) { + public Map getParamTable(boolean backpropOnly) { return paramTable; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java index 021c1a5aa..748f91564 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java @@ -24,6 +24,7 @@ import lombok.val; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.samediff.AbstractSameDiffLayer; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; @@ -64,7 +65,7 @@ public class SameDiffLayer extends AbstractLayer { protected Map gradTable; - public SameDiffLayer(NeuralNetConfiguration conf, DataType dataType){ + public SameDiffLayer(LayerConfiguration conf, DataType dataType){ super(conf, dataType); } @@ -271,7 +272,7 @@ public class SameDiffLayer extends AbstractLayer { @Override public void setBackpropGradientsViewArray(INDArray gradients) { this.gradients = gradients; - this.gradTable = layerConf().initializer().getGradientsFromFlattened(conf(), gradients); + this.gradTable = layerConf().initializer().getGradientsFromFlattened(this.getLayerConfiguration(), gradients); } @Override @@ -286,12 +287,12 @@ public class SameDiffLayer extends AbstractLayer { } @Override - public Map paramTable() { - return paramTable(false); + public Map getParamTable() { + return getParamTable(false); } @Override - public Map paramTable(boolean backpropParamsOnly) { + public Map getParamTable(boolean backpropParamsOnly) { return paramTable; } @@ -301,7 +302,7 @@ public class SameDiffLayer extends AbstractLayer { sameDiff = SameDiff.create(); //Use SingleThreadArrayHolder so we can use views (also don't nede multithreading here, DL4J is not thread safe) sameDiff.setArrayHolders(new SingleThreadArrayHolder(), new SingleThreadArrayHolder(), false); - Map p = paramTable(); + Map p = getParamTable(); long[] inputShape = input.shape().clone(); inputShape[0] = -1; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffOutputLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffOutputLayer.java index 67cf9e648..d3cc93049 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffOutputLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffOutputLayer.java @@ -26,6 +26,7 @@ import lombok.val; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.layers.IOutputLayer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.AbstractLayer; @@ -67,7 +68,7 @@ public class SameDiffOutputLayer extends AbstractLayer gradTable; - public SameDiffOutputLayer(NeuralNetConfiguration conf, DataType dataType){ + public SameDiffOutputLayer(LayerConfiguration conf, DataType dataType){ super(conf, dataType); } @@ -277,7 +278,7 @@ public class SameDiffOutputLayer extends AbstractLayer paramTable() { - return paramTable(false); + public Map getParamTable() { + return getParamTable(false); } @Override - public Map paramTable(boolean backpropParamsOnly) { + public Map getParamTable(boolean backpropParamsOnly) { return paramTable; } @@ -307,7 +308,7 @@ public class SameDiffOutputLayer extends AbstractLayer p = paramTable(); + Map p = getParamTable(); long[] inputShape = input.shape().clone(); inputShape[0] = -1; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/training/CenterLossOutputLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/training/CenterLossOutputLayer.java index 4f85849e8..683f8b4d9 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/training/CenterLossOutputLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/training/CenterLossOutputLayer.java @@ -22,6 +22,7 @@ package org.deeplearning4j.nn.layers.training; import org.deeplearning4j.exception.DL4JInvalidInputException; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.BaseOutputLayer; @@ -39,7 +40,7 @@ public class CenterLossOutputLayer extends BaseOutputLayer { private final Gradient emptyGradient = new DefaultGradient(); - public MaskLayer(NeuralNetConfiguration conf, DataType dataType) { + public MaskLayer(LayerConfiguration conf, DataType dataType) { super(conf, dataType); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/variational/VariationalAutoencoder.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/variational/VariationalAutoencoder.java index 75df1dfad..b3ff49db5 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/variational/VariationalAutoencoder.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/variational/VariationalAutoencoder.java @@ -27,6 +27,7 @@ import org.deeplearning4j.nn.api.TrainingConfig; import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.conf.CacheMode; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.variational.CompositeReconstructionDistribution; import org.deeplearning4j.nn.conf.layers.variational.LossFunctionWrapper; import org.deeplearning4j.nn.conf.layers.variational.ReconstructionDistribution; @@ -39,12 +40,17 @@ import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.deeplearning4j.optimize.Solver; import org.deeplearning4j.optimize.api.ConvexOptimizer; import org.deeplearning4j.optimize.api.TrainingListener; +import org.nd4j.evaluation.IEvaluation; import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.activations.impl.ActivationIdentity; import org.nd4j.linalg.api.blas.Level1; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.memory.MemoryWorkspace; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.dataset.api.DataSet; +import org.nd4j.linalg.dataset.api.MultiDataSet; +import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; +import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator; import org.nd4j.linalg.exception.ND4JArraySizeException; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.learning.regularization.Regularization; @@ -65,7 +71,7 @@ public class VariationalAutoencoder implements Layer { protected Map params; @Getter protected transient Map gradientViews; - protected NeuralNetConfiguration conf; + protected double score = 0.0; protected ConvexOptimizer optimizer; protected Gradient gradient; @@ -91,27 +97,50 @@ public class VariationalAutoencoder implements Layer { @Getter @Setter protected int epochCount; - public VariationalAutoencoder(NeuralNetConfiguration conf, DataType dataType) { - this.conf = conf; + @Getter @Setter @NonNull + private LayerConfiguration layerConfiguration; + + public VariationalAutoencoder(@NonNull LayerConfiguration layerConfiguration, DataType dataType) { + this.layerConfiguration = layerConfiguration; this.dataType = dataType; this.encoderLayerSizes = - ((org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder) conf.getLayer()) + ((org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder) layerConfiguration) .getEncoderLayerSizes(); this.decoderLayerSizes = - ((org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder) conf.getLayer()) + ((org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder) layerConfiguration) .getDecoderLayerSizes(); this.reconstructionDistribution = - ((org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder) conf.getLayer()) + ((org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder) layerConfiguration) .getOutputDistribution(); - this.pzxActivationFn = ((org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder) conf.getLayer()) + this.pzxActivationFn = ((org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder) layerConfiguration) .getPzxActivationFn(); - this.numSamples = ((org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder) conf.getLayer()) + this.numSamples = ((org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder) layerConfiguration) .getNumSamples(); } protected org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder layerConf() { - return (org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder) conf().getLayer(); + return (org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder) layerConfiguration; + } + + /** + * Return the configuration of this layer + * + * @return the configuration + */ + @Override + public LayerConfiguration getLayerConfiguration() { + return layerConf(); + } + + /** + * Set a new layer configuration, new init() needs to be called afterwards. + * + * @param lconf layer configuration + */ + @Override + public void setLayerConfiguration(LayerConfiguration lconf) { + } @Override @@ -123,7 +152,7 @@ public class VariationalAutoencoder implements Layer { } protected String layerId() { - String name = this.conf().getLayer().getLayerName(); + String name = this.getLayerConfiguration().getLayerName(); return "(layer name: " + (name == null ? "\"\"" : name) + ", layer index: " + index + ")"; } @@ -470,9 +499,19 @@ public class VariationalAutoencoder implements Layer { return paramsFlattened; } + /** + * The param table + * + * @return + */ + @Override + public Map getParamTable() { + return null; + } + @Override public TrainingConfig getConfig() { - return conf.getLayer(); + return layerConfiguration; } @Override @@ -522,7 +561,7 @@ public class VariationalAutoencoder implements Layer { } this.gradientsFlattened = gradients; - this.gradientViews = conf.getLayer().initializer().getGradientsFromFlattened(conf, gradients); + this.gradientViews = layerConfiguration.initializer().getGradientsFromFlattened(this.layerConfiguration, gradients); } @Override @@ -548,14 +587,22 @@ public class VariationalAutoencoder implements Layer { return (int) input.size(0); } + /** + * The configuration for the neural network + * + * @return the configuration for the neural network + */ @Override - public NeuralNetConfiguration conf() { - return conf; + public NeuralNetConfiguration getNetConfiguration() { + return this.layerConfiguration.getNetConfiguration(); } + /** + * @param netConfiguration + */ @Override - public void setConf(NeuralNetConfiguration conf) { - this.conf = conf; + public void setNetConfiguration(@NonNull NeuralNetConfiguration netConfiguration) { + } @Override @@ -563,23 +610,94 @@ public class VariationalAutoencoder implements Layer { return input; } + /** + * This method returns updater state (if applicable), null otherwise + * + * @return + */ + @Override + public INDArray updaterState() { + return null; + } + @Override public ConvexOptimizer getOptimizer() { return optimizer; } + /** + * This method fits model with a given DataSet + * + * @param dataSet + */ + @Override + public void fit(DataSet dataSet) { + + } + + /** + * This method fits model with a given MultiDataSet + * + * @param dataSet + */ + @Override + public void fit(MultiDataSet dataSet) { + + } + + /** + * This method fits model with a given DataSetIterator + * + * @param iterator + */ + @Override + public void fit(DataSetIterator iterator) { + + } + + /** + * This method fits model with a given MultiDataSetIterator + * + * @param iterator + */ + @Override + public void fit(MultiDataSetIterator iterator) { + + } + + /** + * This method executes evaluation of the model against given iterator and evaluation + * implementations + * + * @param iterator + * @param evaluations + */ + @Override + public T[] doEvaluation(DataSetIterator iterator, T... evaluations) { + return null; + } + + /** + * This method executes evaluation of the model against given iterator and evaluation + * implementations + * + * @param iterator + * @param evaluations + */ + @Override + public T[] doEvaluation(MultiDataSetIterator iterator, + T... evaluations) { + return null; + } + @Override public INDArray getParam(String param) { return params.get(param); } - @Override - public Map paramTable() { - return new LinkedHashMap<>(params); - } @Override - public Map paramTable(boolean backpropParamsOnly) { + public Map getParamTable(boolean backpropParamsOnly) { Map map = new LinkedHashMap<>(); for (Map.Entry e : params.entrySet()) { if (!backpropParamsOnly || !isPretrainParam(e.getKey())) { @@ -601,8 +719,8 @@ public class VariationalAutoencoder implements Layer { @Override public void setParam(String key, INDArray val) { - if (paramTable().containsKey(key)) { - paramTable().get(key).assign(val); + if (getParamTable().containsKey(key)) { + getParamTable().get(key).assign(val); } else { throw new IllegalArgumentException("Unknown parameter: " + key + " - " + layerId()); } @@ -630,7 +748,7 @@ public class VariationalAutoencoder implements Layer { @Override public double calcRegularizationScore(boolean backpropParamsOnly){ double scoreSum = 0.0; - for (Map.Entry e : paramTable().entrySet()) { + for (Map.Entry e : getParamTable().entrySet()) { if(backpropParamsOnly && isPretrainParam(e.getKey())) continue; List l = layerConf().getRegularizationByParam(e.getKey()); @@ -799,7 +917,6 @@ public class VariationalAutoencoder implements Layer { setListeners(Arrays.asList(listeners)); } - @Override public void setListeners(Collection listeners) { if (trainingListeners == null) trainingListeners = new ArrayList<>(); @@ -905,7 +1022,7 @@ public class VariationalAutoencoder implements Layer { if (solver == null) { try (MemoryWorkspace workspace = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { - solver = new Solver.Builder().model(this).configure(conf()).listeners(getListeners()).build(); + solver = new Solver.Builder().model(this).configure(getNetConfiguration()).listeners(getListeners()).build(); } } this.optimizer = solver.getOptimizer(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/wrapper/BaseWrapperLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/wrapper/BaseWrapperLayer.java index 80439cbc5..d27d9cfbb 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/wrapper/BaseWrapperLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/wrapper/BaseWrapperLayer.java @@ -20,6 +20,8 @@ package org.deeplearning4j.nn.layers.wrapper; +import java.util.Collection; +import java.util.Map; import lombok.Data; import lombok.NonNull; import org.deeplearning4j.nn.api.Layer; @@ -27,308 +29,321 @@ import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.api.TrainingConfig; import org.deeplearning4j.nn.conf.CacheMode; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.Gradient; +import org.deeplearning4j.nn.layers.AbstractLayer; import org.deeplearning4j.nn.layers.LayerHelper; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.deeplearning4j.optimize.api.ConvexOptimizer; import org.deeplearning4j.optimize.api.TrainingListener; -import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.common.primitives.Pair; - -import java.util.Collection; -import java.util.Map; +import org.nd4j.linalg.api.ndarray.INDArray; @Data -public abstract class BaseWrapperLayer implements Layer { +public abstract class BaseWrapperLayer extends AbstractLayer { - protected Layer underlying; + protected Layer underlying; - public BaseWrapperLayer(@NonNull Layer underlying){ - this.underlying = underlying; - } - @Override - public void setCacheMode(CacheMode mode) { - underlying.setCacheMode(mode); - } + public BaseWrapperLayer(@NonNull Layer underlying) { + this.underlying = underlying; + } - @Override - public double calcRegularizationScore(boolean backpropParamsOnly){ - return underlying.calcRegularizationScore(backpropParamsOnly); - } + /** + * Return the configuration of this layer (which is the configuration of the underlying layer in + * this case + * + * @return the underlying layer configuration + */ + @Override + public LayerConfiguration getLayerConfiguration() { + return underlying.getLayerConfiguration(); + } - @Override - public Type type() { - return underlying.type(); - } + @Override + public void setLayerConfiguration(LayerConfiguration layerConfiguration) { + underlying.setLayerConfiguration(layerConfiguration); + } - @Override - public Pair backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { - return underlying.backpropGradient(epsilon, workspaceMgr); - } + @Override + public void setCacheMode(CacheMode mode) { + underlying.setCacheMode(mode); + } - @Override - public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) { - return underlying.activate(training, workspaceMgr); - } + @Override + public double calcRegularizationScore(boolean backpropParamsOnly) { + return underlying.calcRegularizationScore(backpropParamsOnly); + } - @Override - public INDArray activate(INDArray input, boolean training, LayerWorkspaceMgr workspaceMgr) { - return underlying.activate(input, training, workspaceMgr); - } + @Override + public Type type() { + return underlying.type(); + } - @Override - public Collection getListeners() { - return underlying.getListeners(); - } + @Override + public Pair backpropGradient(INDArray epsilon, + LayerWorkspaceMgr workspaceMgr) { + return underlying.backpropGradient(epsilon, workspaceMgr); + } - @Override - public void setListeners(TrainingListener... listeners) { - underlying.setListeners(listeners); - } + @Override + public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) { + return underlying.activate(training, workspaceMgr); + } - @Override - public void addListeners(TrainingListener... listener) { - underlying.addListeners(listener); - } + @Override + public INDArray activate(INDArray input, boolean training, LayerWorkspaceMgr workspaceMgr) { + return underlying.activate(input, training, workspaceMgr); + } - @Override - public void fit() { - underlying.fit(); - } + @Override + public Collection getListeners() { + return underlying.getListeners(); + } - @Override - public void update(Gradient gradient) { - underlying.update(gradient); - } + @Override + public void setListeners(TrainingListener... listeners) { + underlying.setListeners(listeners); + } - @Override - public void update(INDArray gradient, String paramType) { - underlying.update(gradient, paramType); - } + @Override + public void addListeners(TrainingListener... listener) { + underlying.addListeners(listener); + } - @Override - public double score() { - return underlying.score(); - } + @Override + public void fit() { + underlying.fit(); + } - @Override - public void computeGradientAndScore(LayerWorkspaceMgr workspaceMgr) { - underlying.computeGradientAndScore(workspaceMgr); - } + @Override + public void update(Gradient gradient) { + underlying.update(gradient); + } - @Override - public INDArray params() { - return underlying.params(); - } + @Override + public void update(INDArray gradient, String paramType) { + underlying.update(gradient, paramType); + } - @Override - public long numParams() { - return underlying.numParams(); - } + @Override + public double score() { + return underlying.score(); + } - @Override - public long numParams(boolean backwards) { - return underlying.numParams(); - } + @Override + public void computeGradientAndScore(LayerWorkspaceMgr workspaceMgr) { + underlying.computeGradientAndScore(workspaceMgr); + } - @Override - public void setParams(INDArray params) { - underlying.setParams(params); - } + @Override + public INDArray params() { + return underlying.params(); + } - @Override - public void setParamsViewArray(INDArray params) { - underlying.setParamsViewArray(params); - } + @Override + public long numParams() { + return underlying.numParams(); + } - @Override - public INDArray getGradientsViewArray() { - return underlying.getGradientsViewArray(); - } + @Override + public long numParams(boolean backwards) { + return underlying.numParams(); + } - @Override - public void setBackpropGradientsViewArray(INDArray gradients) { - underlying.setBackpropGradientsViewArray(gradients); - } + @Override + public void setParams(INDArray params) { + underlying.setParams(params); + } - @Override - public void fit(INDArray data, LayerWorkspaceMgr workspaceMgr) { - underlying.fit(data, workspaceMgr); - } + @Override + public void setParamsViewArray(INDArray params) { + underlying.setParamsViewArray(params); + } - @Override - public Gradient gradient() { - return underlying.gradient(); - } + @Override + public INDArray getGradientsViewArray() { + return underlying.getGradientsViewArray(); + } - @Override - public Pair gradientAndScore() { - return underlying.gradientAndScore(); - } + @Override + public void setBackpropGradientsViewArray(INDArray gradients) { + underlying.setBackpropGradientsViewArray(gradients); + } - @Override - public int batchSize() { - return underlying.batchSize(); - } + @Override + public void fit(INDArray data, LayerWorkspaceMgr workspaceMgr) { + underlying.fit(data, workspaceMgr); + } - @Override - public NeuralNetConfiguration conf() { - return underlying.conf(); - } + @Override + public Gradient gradient() { + return underlying.gradient(); + } - @Override - public void setConf(NeuralNetConfiguration conf) { - underlying.setConf(conf); - } + @Override + public Pair gradientAndScore() { + return underlying.gradientAndScore(); + } - @Override - public INDArray input() { - return underlying.input(); - } + @Override + public int batchSize() { + return underlying.batchSize(); + } - @Override - public ConvexOptimizer getOptimizer() { - return underlying.getOptimizer(); - } + @Override + public NeuralNetConfiguration getNetConfiguration() { + return underlying.getNetConfiguration(); + } - @Override - public INDArray getParam(String param) { - return underlying.getParam(param); - } + @Override + public INDArray input() { + return underlying.input(); + } - @Override - public Map paramTable() { - return underlying.paramTable(); - } + @Override + public ConvexOptimizer getOptimizer() { + return underlying.getOptimizer(); + } - @Override - public Map paramTable(boolean backpropParamsOnly) { - return underlying.paramTable(backpropParamsOnly); - } + @Override + public INDArray getParam(String param) { + return underlying.getParam(param); + } - @Override - public void setParamTable(Map paramTable) { - underlying.setParamTable(paramTable); - } + @Override + public Map getParamTable() { + return underlying.getParamTable(); + } - @Override - public void setParam(String key, INDArray val) { - underlying.setParam(key, val); - } + /** + * Setter for the param table + * + * @param paramTable Map<String, INDArray> + */ + @Override + public void setParamTable(Map paramTable) { + underlying.setParamTable(paramTable); + } - @Override - public void clear() { - underlying.clear(); - } + @Override + public Map getParamTable(boolean backpropParamsOnly) { + return underlying.getParamTable(backpropParamsOnly); + } - @Override - public void applyConstraints(int iteration, int epoch) { - underlying.applyConstraints(iteration, epoch); - } + @Override + public void setParam(String key, INDArray val) { + underlying.setParam(key, val); + } - @Override - public void init() { - underlying.init(); - } + @Override + public void clear() { + underlying.clear(); + } - @Override - public void setListeners(Collection listeners) { - underlying.setListeners(listeners); - } + @Override + public void applyConstraints(int iteration, int epoch) { + underlying.applyConstraints(iteration, epoch); + } - @Override - public void setIndex(int index) { - underlying.setIndex(index); - } + @Override + public void init() { + underlying.init(); + } - @Override - public int getIndex() { - return underlying.getIndex(); - } + @Override + public int getIndex() { + return underlying.getIndex(); + } - @Override - public int getIterationCount() { - return underlying.getIterationCount(); - } + @Override + public void setIndex(int index) { + underlying.setIndex(index); + } - @Override - public int getEpochCount() { - return underlying.getEpochCount(); - } + @Override + public int getIterationCount() { + return underlying.getIterationCount(); + } - @Override - public void setIterationCount(int iterationCount) { - underlying.setIterationCount(iterationCount); - } + @Override + public void setIterationCount(int iterationCount) { + underlying.setIterationCount(iterationCount); + } - @Override - public void setEpochCount(int epochCount) { - underlying.setEpochCount(epochCount); - } + @Override + public int getEpochCount() { + return underlying.getEpochCount(); + } - @Override - public void setInput(INDArray input, LayerWorkspaceMgr workspaceMgr) { - underlying.setInput(input, workspaceMgr); - } + @Override + public void setEpochCount(int epochCount) { + underlying.setEpochCount(epochCount); + } - @Override - public void setInputMiniBatchSize(int size) { - underlying.setInputMiniBatchSize(size); - } + @Override + public void setInput(INDArray input, LayerWorkspaceMgr workspaceMgr) { + underlying.setInput(input, workspaceMgr); + } - @Override - public int getInputMiniBatchSize() { - return underlying.getInputMiniBatchSize(); - } + @Override + public int getInputMiniBatchSize() { + return underlying.getInputMiniBatchSize(); + } - @Override - public void setMaskArray(INDArray maskArray) { - underlying.setMaskArray(maskArray); - } + @Override + public void setInputMiniBatchSize(int size) { + underlying.setInputMiniBatchSize(size); + } - @Override - public INDArray getMaskArray() { - return underlying.getMaskArray(); - } + @Override + public INDArray getMaskArray() { + return underlying.getMaskArray(); + } - @Override - public boolean isPretrainLayer() { - return underlying.isPretrainLayer(); - } + @Override + public void setMaskArray(INDArray maskArray) { + underlying.setMaskArray(maskArray); + } - @Override - public void clearNoiseWeightParams() { - underlying.clearNoiseWeightParams(); - } + @Override + public boolean isPretrainLayer() { + return underlying.isPretrainLayer(); + } - @Override - public Pair feedForwardMaskArray(INDArray maskArray, MaskState currentMaskState, int minibatchSize) { - return underlying.feedForwardMaskArray(maskArray, currentMaskState, minibatchSize); - } + @Override + public void clearNoiseWeightParams() { + underlying.clearNoiseWeightParams(); + } - @Override - public void allowInputModification(boolean allow) { - underlying.allowInputModification(allow); - } + @Override + public Pair feedForwardMaskArray(INDArray maskArray, + MaskState currentMaskState, int minibatchSize) { + return underlying.feedForwardMaskArray(maskArray, currentMaskState, minibatchSize); + } - @Override - public LayerHelper getHelper() { - return underlying.getHelper(); - } + @Override + public void allowInputModification(boolean allow) { + underlying.allowInputModification(allow); + } - @Override - public TrainingConfig getConfig() { - return underlying.getConfig(); - } + @Override + public LayerHelper getHelper() { + return underlying.getHelper(); + } - @Override - public boolean updaterDivideByMinibatch(String paramName) { - return underlying.updaterDivideByMinibatch(paramName); - } + @Override + public TrainingConfig getConfig() { + return underlying.getConfig(); + } - @Override - public void close(){ - //No-op for individual layers - } + @Override + public boolean updaterDivideByMinibatch(String paramName) { + return underlying.updaterDivideByMinibatch(paramName); + } + + @Override + public void close() { + //No-op for individual layers + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java index 0f81392f9..7da7f837c 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java @@ -21,12 +21,27 @@ package org.deeplearning4j.nn.multilayer; +import java.io.File; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; import lombok.Getter; import lombok.NonNull; import lombok.Setter; import lombok.extern.slf4j.Slf4j; import lombok.val; -import net.brutex.ai.dnn.api.INeuralNetwork; import net.brutex.ai.dnn.networks.ArtificialNeuralNetwork; import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.StringUtils; @@ -34,13 +49,26 @@ import org.bytedeco.javacpp.Pointer; import org.deeplearning4j.datasets.iterator.MultiDataSetWrapperIterator; import org.deeplearning4j.exception.DL4JException; import org.deeplearning4j.exception.DL4JInvalidInputException; +import org.deeplearning4j.nn.api.Classifier; +import org.deeplearning4j.nn.api.FwdPassType; +import org.deeplearning4j.nn.api.Layer; +import org.deeplearning4j.nn.api.MaskState; +import org.deeplearning4j.nn.api.ModelAdapter; +import org.deeplearning4j.nn.api.TrainingConfig; import org.deeplearning4j.nn.api.Updater; -import org.deeplearning4j.nn.api.*; import org.deeplearning4j.nn.api.layers.IOutputLayer; import org.deeplearning4j.nn.api.layers.RecurrentLayer; -import org.deeplearning4j.nn.conf.*; +import org.deeplearning4j.nn.conf.BackpropType; +import org.deeplearning4j.nn.conf.CNN2DFormat; +import org.deeplearning4j.nn.conf.CacheMode; +import org.deeplearning4j.nn.conf.InputPreProcessor; +import org.deeplearning4j.nn.conf.NeuralNetBaseBuilderConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.RNNFormat; +import org.deeplearning4j.nn.conf.WorkspaceMode; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.FeedForwardLayer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.recurrent.Bidirectional; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; @@ -57,9 +85,18 @@ import org.deeplearning4j.optimize.Solver; import org.deeplearning4j.optimize.api.ConvexOptimizer; import org.deeplearning4j.optimize.api.TrainingListener; import org.deeplearning4j.optimize.solvers.accumulation.GradientsAccumulator; -import org.deeplearning4j.util.*; +import org.deeplearning4j.util.Convolution1DUtils; +import org.deeplearning4j.util.ConvolutionUtils; +import org.deeplearning4j.util.CrashReportingUtil; +import org.deeplearning4j.util.ModelSerializer; +import org.deeplearning4j.util.NetworkUtils; +import org.deeplearning4j.util.OutputLayerUtil; +import org.jetbrains.annotations.NotNull; import org.nd4j.adapters.OutputAdapter; import org.nd4j.common.base.Preconditions; +import org.nd4j.common.primitives.Pair; +import org.nd4j.common.primitives.Triple; +import org.nd4j.common.util.OneTimeLogger; import org.nd4j.evaluation.IEvaluation; import org.nd4j.evaluation.classification.Evaluation; import org.nd4j.evaluation.classification.ROC; @@ -88,16 +125,10 @@ import org.nd4j.linalg.heartbeat.reports.Task; import org.nd4j.linalg.heartbeat.utils.EnvironmentUtils; import org.nd4j.linalg.heartbeat.utils.TaskUtils; import org.nd4j.linalg.indexing.NDArrayIndex; -import org.nd4j.common.primitives.Pair; -import org.nd4j.common.primitives.Triple; import org.nd4j.linalg.schedule.ISchedule; import org.nd4j.linalg.util.FeatureUtil; import org.nd4j.linalg.workspace.ND4JWorkspaceException; import org.nd4j.linalg.workspace.WorkspaceUtils; -import org.nd4j.common.util.OneTimeLogger; - -import java.io.*; -import java.util.*; /** * Artificial Neural Network An artificial neural network (1) takes some input data, and (2) @@ -115,8 +146,8 @@ import java.util.*; * weights (or parameters) so that predictions get more accurate. */ @Slf4j -public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serializable, Classifier, Layer, - INeuralNetwork { +public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serializable, Classifier, + Layer { /** * Workspace for working memory for a single layer: forward pass and backward pass Note that this @@ -155,15 +186,14 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial .initialSize(0).overallocationLimit(0.05).policyReset(ResetPolicy.BLOCK_LEFT) .policyAllocation(AllocationPolicy.OVERALLOCATE).policySpill(SpillPolicy.REALLOCATE) .policyLearning(LearningPolicy.FIRST_LOOP).build(); + //the hidden neural network layers (including output layer) protected Layer[] layers; - protected LinkedHashMap layerMap = new LinkedHashMap<>(); + //Current training data: input features and labels protected INDArray input, labels; protected boolean initCalled = false; protected Collection trainingListeners = new ArrayList<>(); - protected NeuralNetConfiguration defaultConfiguration; - protected MultiLayerConfiguration layerWiseConfigurations; protected Gradient gradient; protected double score; @Setter @@ -174,7 +204,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial protected boolean clearTbpttState = true; //Mainly for unit testing (should be enabled otherwise) protected transient ThreadLocal lastEtlTime = new ThreadLocal<>(); protected INDArray mask; - protected int layerIndex; //For Layer.get/setIndex() + protected int layerIndex; //For LayerConfiguration.get/setIndex() protected transient Solver solver; //Used to call optimizers during backprop //Workspaces for CUDNN. Pass to LayerWorkspaceMgr for re-use in cudnn helpers @Getter @@ -183,27 +213,34 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial protected WorkspaceConfiguration WS_LAYER_ACT_X_CONFIG; - public MultiLayerNetwork(MultiLayerConfiguration conf) { - this.layerWiseConfigurations = conf; - this.defaultConfiguration = conf.getConf(0).clone(); + public MultiLayerNetwork(@NotNull NeuralNetConfiguration conf) { + super(conf); //Working memory: should learn over course of: (a) full forward pass, and (b) full backward pass //Working memory should be opened once per layer and once per preprocessor, for each of forward and backward passes - int numWorkingMem = 2 * (layerWiseConfigurations.getConfs().size() - + layerWiseConfigurations.getInputPreProcessors().size()); + int numWorkingMem = 2 * (conf.getFlattenedLayerConfigurations().size() + + conf.getInputPreProcessors().size()); WS_LAYER_WORKING_MEM_CONFIG = getLayerWorkingMemWSConfig(numWorkingMem); - WS_LAYER_ACT_X_CONFIG = getLayerActivationWSConfig(layerWiseConfigurations.getConfs().size()); + WS_LAYER_ACT_X_CONFIG = getLayerActivationWSConfig( + conf.getFlattenedLayerConfigurations().size()); + + init(); } + public MultiLayerNetwork(@NotNull NeuralNetBaseBuilderConfiguration conf) { + this(( NeuralNetConfiguration) conf); + } + + /** - * Initialize the network based on the configuration (a MultiLayerConfiguration in JSON format) - * and parameters array + * Initialize the network based on the configuration (a NeuralNetConfiguration in JSON format) and + * parameters array * * @param conf the configuration json * @param params the parameters for the network */ public MultiLayerNetwork(String conf, INDArray params) { - this(MultiLayerConfiguration.fromJson(conf)); + this(NeuralNetConfiguration.fromJson(conf)); init(); setParameters(params); } @@ -214,7 +251,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial * @param conf the configuration * @param params the parameters */ - public MultiLayerNetwork(MultiLayerConfiguration conf, INDArray params) { + public MultiLayerNetwork(NeuralNetConfiguration conf, INDArray params) { this(conf); init(); setParameters(params); @@ -261,6 +298,28 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial return ModelSerializer.restoreMultiLayerNetwork(f, loadUpdater); } + /** + * Return the configuration of this layer + * + * @return the configuration + */ + @Override + public LayerConfiguration getLayerConfiguration() { + //TODO + throw new RuntimeException( + "getLayerConfiguration cannot be called on a MultiLayerNetwork. This function is here because of inheritance from Layer (which should be fixed)."); + } + + /** + * Set a new layer configuration, new init() needs to be called afterwards. + * + * @param lconf layer configuration + */ + @Override + public void setLayerConfiguration(LayerConfiguration lconf) { + throw new RuntimeException("setLayerConfiguration has no effect on a MultiLayerNetwork"); + } + /** * This method sets specified CacheMode for all layers within network * @@ -299,20 +358,6 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial lastEtlTime.set(time); } - protected void intializeConfigurations() { - if (layerWiseConfigurations == null) { - layerWiseConfigurations = new MultiLayerConfiguration.Builder().build(); - } - - if (layers == null) { - layers = new Layer[getnLayers()]; - } - - if (defaultConfiguration == null) { - defaultConfiguration = new NeuralNetConfiguration.Builder().build(); - } - } - /** * Perform layerwise pretraining for one epoch - see {@link #pretrain(DataSetIterator, int)} */ @@ -396,8 +441,8 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } } - int ec = getLayer(layerIdx).conf().getEpochCount() + 1; - getLayer(layerIdx).conf().setEpochCount(ec); + int ec = getLayer(layerIdx).getNetConfiguration().getEpochCount() + 1; + getLayer(layerIdx).getNetConfiguration().setEpochCount(ec); } /** @@ -422,7 +467,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } LayerWorkspaceMgr workspaceMgr; - if (layerWiseConfigurations.getTrainingWorkspaceMode() == WorkspaceMode.NONE) { + if (getNetConfiguration().getTrainingWorkspaceMode() == WorkspaceMode.NONE) { workspaceMgr = LayerWorkspaceMgr.noWorkspaces(); } else { workspaceMgr = LayerWorkspaceMgr.builder() @@ -450,12 +495,12 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } try (MemoryWorkspace ws = workspaceMgr.notifyScopeEntered(ArrayType.FF_WORKING_MEM)) { - if (layerWiseConfigurations.getInputPreProcess(layerIdx) != null) { + if (getNetConfiguration().getInputPreProcess(layerIdx) != null) { if (input.size(0) > Integer.MAX_VALUE) { throw new ND4JArraySizeException(); } - outputOfPrevLayer = layerWiseConfigurations.getInputPreProcess(layerIdx) + outputOfPrevLayer = getNetConfiguration().getInputPreProcess(layerIdx) .preProcess(outputOfPrevLayer, (int) input.size(0), LayerWorkspaceMgr.noWorkspaces(helperWorkspaces)); } @@ -475,16 +520,6 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial return labels == null ? (int) input.size(0) : (int) labels.size(0); } - @Override - public NeuralNetConfiguration conf() { - return defaultConfiguration; - } - - @Override - public void setConf(NeuralNetConfiguration conf) { - throw new UnsupportedOperationException(); - } - @Override public INDArray input() { return input; @@ -498,14 +533,14 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial /** * Get one parameter array for the network.
In MultiLayerNetwork, parameters are keyed like * "0_W" and "0_b" to mean "weights of layer index 0" and "biases of layer index 0" respectively. - * Numbers increment sequentially, and the suffixes ("W", "b" etc) depend on the layer type, and + * Numbers increment sequentially, and the suffixes ("W", "b" etc.) depend on the layer type, and * are defined in the relevant parameter initializers for each layer.
Note that the returned * INDArrays are views of the underlying network parameters, so modifications of the returned * arrays will impact the parameters of the network. * * @param param the key of the parameter * @return The specified parameter array for the network - * @see #paramTable() paramTable() method, for a map of all parameters + * @see #getParamTable() paramTable() method, for a map of all parameters */ @Override public INDArray getParam(String param) { @@ -521,20 +556,9 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial return layers[layerIdx].getParam(newKey); } - /** - * Return a map of all parameters in the network. Parameter names are as described in - * {@link #getParam(String)}. As per {@link #getParam(String)} the returned arrays are views - - * modifications to these will impact the underlying network parameters - * - * @return A map of all parameters in the network - */ - @Override - public Map paramTable() { - return paramTable(false); - } /** - * Returns a map of all parameters in the network as per {@link #paramTable()}.
Optionally + * Returns a map of all parameters in the network as per {@link #getParamTable()}.
Optionally * (with backpropParamsOnly=true) only the 'backprop' parameters are returned - that is, any * parameters involved only in unsupervised layerwise pretraining not standard inference/backprop * are excluded from the returned list. @@ -546,7 +570,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial //Get all parameters from all layers Map allParams = new LinkedHashMap<>(); for (int i = 0; i < layers.length; i++) { - Map paramMap = layers[i].paramTable(backpropParamsOnly); + Map paramMap = layers[i].getParamTable(backpropParamsOnly); for (Map.Entry entry : paramMap.entrySet()) { String newKey = i + "_" + entry.getKey(); allParams.put(newKey, entry.getValue()); @@ -568,7 +592,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial /** * Set the parameters of the netowrk. Note that the parameter keys must match the format as - * described in {@link #getParam(String)} and {@link #paramTable()}. Note that the values of the + * described in {@link #getParam(String)} and {@link #getParamTable()}. Note that the values of the * parameters used as an argument to this method are copied - i.e., it is safe to later * modify/reuse the values in the provided paramTable without this impacting the network. * @@ -576,7 +600,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial */ @Override public void setParamTable(Map paramTable) { - Map currParamTable = paramTable(); + Map currParamTable = getParamTable(); if (!currParamTable.keySet().equals(paramTable.keySet())) { throw new IllegalArgumentException( "Cannot set param table: parameter keys do not match.\n" + "Current: " @@ -623,22 +647,6 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial layers[layerIdx].setParam(newKey, val); } - /** - * Get the configuration for the network - * - * @return Network configuration - */ - public MultiLayerConfiguration getLayerWiseConfigurations() { - return layerWiseConfigurations; - } - - /** - * This method is intended for internal/developer use only. - */ - public void setLayerWiseConfigurations(MultiLayerConfiguration layerWiseConfigurations) { - this.layerWiseConfigurations = layerWiseConfigurations; - } - /** * Initialize the MultiLayerNetwork. This should be called once before the network is used. This * is functionally equivalent to calling {@code init(null, false)}. @@ -660,20 +668,17 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial * directly */ public void init(INDArray parameters, boolean cloneParametersArray) { - if (layerWiseConfigurations == null || layers == null) { - intializeConfigurations(); - } if (initCalled) { return; } - DataType netDtype = getLayerWiseConfigurations().getDataType(); + DataType netDtype = getNetConfiguration().getDataType(); if (parameters != null && parameters.dataType() != netDtype) { Preconditions.checkState(parameters.rank() == 2 && parameters.size(0) == 1, "Invalid parameters array: should be rank 2 with shape [1,numParams]. Got %ndShape", parameters); if (cloneParametersArray) { - try (MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) { + try (MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) { parameters = parameters.castTo(netDtype); } } else { @@ -685,29 +690,25 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } } - if (layerMap == null) { - layerMap = new LinkedHashMap<>(); + if (getNetConfiguration().getTrainingWorkspaceMode() == null) { + getNetConfiguration().setTrainingWorkspaceMode(WorkspaceMode.NONE); } - if (layerWiseConfigurations.getTrainingWorkspaceMode() == null) { - layerWiseConfigurations.setTrainingWorkspaceMode(WorkspaceMode.NONE); + if (getNetConfiguration().getInferenceWorkspaceMode() == null) { + getNetConfiguration().setInferenceWorkspaceMode(WorkspaceMode.NONE); } - if (layerWiseConfigurations.getInferenceWorkspaceMode() == null) { - layerWiseConfigurations.setInferenceWorkspaceMode(WorkspaceMode.NONE); - } - - if (layerWiseConfigurations.getCacheMode() == null) { - layerWiseConfigurations.setCacheMode(CacheMode.NONE); + if (getNetConfiguration().getCacheMode() == null) { + getNetConfiguration().setCacheMode(CacheMode.NONE); } OneTimeLogger.info(log, "Starting MultiLayerNetwork with WorkspaceModes set to [training: {}; inference: {}], cacheMode set to [{}]", - layerWiseConfigurations.getTrainingWorkspaceMode(), - layerWiseConfigurations.getInferenceWorkspaceMode(), - layerWiseConfigurations.getCacheMode()); + getNetConfiguration().getTrainingWorkspaceMode(), + getNetConfiguration().getInferenceWorkspaceMode(), + getNetConfiguration().getCacheMode()); - int nLayers = getnLayers(); + int nLayers = getNetConfiguration().getFlattenedLayerConfigurations().size(); if (nLayers < 1) { throw new IllegalStateException("Unable to create network: number of layers is less than 1"); @@ -722,9 +723,9 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial long paramLength = 0; val nParamsPerLayer = new long[nLayers]; for (int i = 0; i < nLayers; i++) { - NeuralNetConfiguration conf = layerWiseConfigurations.getConf(i); - conf.getLayer().setDataType(netDtype); - nParamsPerLayer[i] = conf.getLayer().initializer().numParams(conf); + LayerConfiguration layer_conf = getNetConfiguration().getFlattenedLayerConfigurations().get(i); + layer_conf.setDataType(netDtype); + nParamsPerLayer[i] = layer_conf.initializer().numParams(layer_conf); paramLength += nParamsPerLayer[i]; } @@ -757,7 +758,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial //Set RNG seed, for repeatability between initializations when set if (initializeParams) { - Nd4j.getRandom().setSeed(getDefaultConfiguration().getSeed()); + Nd4j.getRandom().setSeed(getNetConfiguration().getSeed()); } // construct multi-layer @@ -771,28 +772,27 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial paramsView = null; } paramCountSoFar += nParamsPerLayer[i]; - - NeuralNetConfiguration conf = layerWiseConfigurations.getConf(i); - layers[i] = conf.getLayer() - .instantiate(conf, trainingListeners, i, paramsView, initializeParams, netDtype); - layerMap.put(conf.getLayer().getLayerName(), layers[i]); + @NonNull + LayerConfiguration lc = getNetConfiguration().getFlattenedLayerConfigurations().get(i); + layers[i] = lc.instantiate(lc.getNetConfiguration(), trainingListeners, i, paramsView, initializeParams, + netDtype); } initCalled = true; } - //Set parameters in MultiLayerNetwork.defaultConfiguration for later use in BaseOptimizer.setupSearchState() etc - defaultConfiguration.clearVariables(); - List variables = defaultConfiguration.variables(false); + //Set parameters in MultiLayerNetwork.getNetConfiguration() for later use in BaseOptimizer.setupSearchState() etc + getNetConfiguration().clearNetWideVariable(); + List variables = getNetConfiguration().netWideVariables(false); for (int i = 0; i < layers.length; i++) { if (layers[i] == null) { throw new IllegalStateException( "Encountered null layer during initialization for layer " + i + - ": " + layerWiseConfigurations.getConf(i).getLayer().getClass().getSimpleName() + ": " + layers[i].getClass().getSimpleName() + " initialization " + "returned null layer?"); } - for (String s : layers[i].conf().variables()) { + for (String s : layers[i].getNetConfiguration().netWideVariables()) { variables.add(i + "_" + s); } } @@ -800,7 +800,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial // now we init solver & optimizer if (solver == null) { try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { - solver = new Solver.Builder().configure(conf()).listeners(getListeners()).model(this) + solver = new Solver.Builder().configure(getNetConfiguration()).listeners(getListeners()).model(this) .build(); solver.initOptimizer(); } @@ -832,7 +832,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial if (solver == null) { try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { - solver = new Solver.Builder().configure(conf()).listeners(getListeners()).model(this) + solver = new Solver.Builder().configure(getNetConfiguration()).listeners(getListeners()).model(this) .build(); } } @@ -861,8 +861,8 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial long paramLength = 0; val nParamsPerLayer = new long[nLayers]; for (int i = 0; i < nLayers; i++) { - NeuralNetConfiguration conf = layerWiseConfigurations.getConf(i); - nParamsPerLayer[i] = conf.getLayer().initializer().numParams(conf); + LayerConfiguration layerConfiguration = getNetConfiguration().getFlattenedLayerConfigurations().get(i); + nParamsPerLayer[i] = layerConfiguration.initializer().numParams(layerConfiguration); //TODO better initialisation paramLength += nParamsPerLayer[i]; } @@ -886,8 +886,8 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial protected INDArray activationFromPrevLayer(int curr, INDArray input, boolean training, LayerWorkspaceMgr mgr) { - if (getLayerWiseConfigurations().getInputPreProcess(curr) != null) { - input = getLayerWiseConfigurations().getInputPreProcess(curr) + if (getNetConfiguration().getInputPreProcess(curr) != null) { + input = getNetConfiguration().getInputPreProcess(curr) .preProcess(input, getInputMiniBatchSize(), mgr); } @@ -1060,10 +1060,10 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial try { mgr.validateArrayLocation(arrayType, array, false, layerIdx > 0); } catch (ND4JWorkspaceException e) { - String layerName = layers[layerIdx].conf().getLayer().getLayerName(); + String layerName = layers[layerIdx].getLayerConfiguration().getLayerName(); String clazz; if (isPreprocessor) { - clazz = layerWiseConfigurations.getInputPreProcess(layerIdx).getClass().getName(); + clazz = getNetConfiguration().getInputPreProcess(layerIdx).getClass().getName(); } else { clazz = layers[layerIdx].getClass().getName(); } @@ -1106,8 +1106,8 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial "Expected no workspace active in ffToLayerActivationsDetached"); LayerWorkspaceMgr workspaceMgr; - WorkspaceMode wsm = (train ? layerWiseConfigurations.getTrainingWorkspaceMode() - : layerWiseConfigurations.getInferenceWorkspaceMode()); + WorkspaceMode wsm = (train ? getNetConfiguration().getTrainingWorkspaceMode() + : getNetConfiguration().getInferenceWorkspaceMode()); if (wsm == WorkspaceMode.NONE) { workspaceMgr = LayerWorkspaceMgr.noWorkspaces(); } else { @@ -1137,8 +1137,8 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial for (int i = 0; i <= layerIndex; i++) { try (MemoryWorkspace wsFFWorking = workspaceMgr.notifyScopeEntered( ArrayType.FF_WORKING_MEM)) { - if (getLayerWiseConfigurations().getInputPreProcess(i) != null) { - input = getLayerWiseConfigurations().getInputPreProcess(i) + if (getNetConfiguration().getInputPreProcess(i) != null) { + input = getNetConfiguration().getInputPreProcess(i) .preProcess(input, getInputMiniBatchSize(), workspaceMgr); //Validation: Exception if invalid (bad preprocessor implementation) validateArrayWorkspaces(workspaceMgr, input, ArrayType.ACTIVATIONS, i, true, @@ -1207,7 +1207,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial setLayerMaskArrays(fMask, lMask); LayerWorkspaceMgr workspaceMgr; - if (layerWiseConfigurations.getTrainingWorkspaceMode() == WorkspaceMode.NONE) { + if (getNetConfiguration().getTrainingWorkspaceMode() == WorkspaceMode.NONE) { WorkspaceUtils.assertNoWorkspacesOpen( "Expected no workspace active in ffToLayerActivationsInWs when training workspace is set to NONE"); workspaceMgr = LayerWorkspaceMgr.noWorkspaces(); @@ -1225,7 +1225,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial workspaceMgr.setNoLeverageOverride(input.data().getParentWorkspace().getId()); } - if (layerWiseConfigurations.getCacheMode() != CacheMode.NONE) { + if (getNetConfiguration().getCacheMode() != CacheMode.NONE) { //For now: store cache mode activations in activations workspace workspaceMgr.setWorkspace(ArrayType.FF_CACHE, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG); workspaceMgr.setWorkspace(ArrayType.BP_WORKING_MEM, WS_LAYER_WORKING_MEM, @@ -1245,8 +1245,8 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial for (int i = 0; i <= layerIndex; i++) { try (MemoryWorkspace wsFFWorking = workspaceMgr.notifyScopeEntered( ArrayType.FF_WORKING_MEM)) { - if (getLayerWiseConfigurations().getInputPreProcess(i) != null) { - input = getLayerWiseConfigurations().getInputPreProcess(i) + if (getNetConfiguration().getInputPreProcess(i) != null) { + input = getNetConfiguration().getInputPreProcess(i) .preProcess(input, getInputMiniBatchSize(), workspaceMgr); //Validation: Exception if invalid (bad preprocessor implementation) validateArrayWorkspaces(workspaceMgr, input, ArrayType.ACTIVATIONS, i, true, @@ -1280,7 +1280,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } if (input == null) { - throw new IllegalStateException("Layer " + i + " returned null activations"); + throw new IllegalStateException("LayerConfiguration " + i + " returned null activations"); } //Validation: Exception if invalid (bad layer implementation) @@ -1355,8 +1355,8 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial LayerWorkspaceMgr mgrEven; LayerWorkspaceMgr mgrOdd; - WorkspaceMode wsm = train ? layerWiseConfigurations.getTrainingWorkspaceMode() - : layerWiseConfigurations.getInferenceWorkspaceMode(); + WorkspaceMode wsm = train ? getNetConfiguration().getTrainingWorkspaceMode() + : getNetConfiguration().getInferenceWorkspaceMode(); if (wsm == WorkspaceMode.NONE) { mgrEven = LayerWorkspaceMgr.noWorkspaces(); mgrOdd = mgrEven; @@ -1368,7 +1368,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial : "inference") + " workspace mode is set to NONE. Cannot put output activations into the specified workspace if" + - "workspaces are disabled for the network. use getConfiguration().setTraining/InferenceWorkspaceMode(WorkspaceMode.ENABLED)"); + "workspaces are disabled for the network. use getNetConfiguration().setTraining/InferenceWorkspaceMode(WorkspaceMode.ENABLED)"); } } else { mgrEven = LayerWorkspaceMgr.builder() @@ -1430,8 +1430,8 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial mgr.setNoLeverageOverride(input.data().getParentWorkspace().getId()); } - if (getLayerWiseConfigurations().getInputPreProcess(i) != null) { - input = getLayerWiseConfigurations().getInputPreProcess(i) + if (getNetConfiguration().getInputPreProcess(i) != null) { + input = getNetConfiguration().getInputPreProcess(i) .preProcess(input, getInputMiniBatchSize(), mgr); //Validation: Exception if invalid (bad preprocessor implementation) validateArrayWorkspaces(mgr, input, ArrayType.ACTIVATIONS, i, true, @@ -1451,13 +1451,14 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial if (fwdPassType == FwdPassType.STANDARD) { //Standard feed-forward case - if (i > 0 && ConvolutionUtils.layerHasConvolutionLayout(layers[i - 1].conf().getLayer()) - && ConvolutionUtils.layerHasConvolutionLayout(layers[i].conf().getLayer())) { + if (i > 0 && ConvolutionUtils.layerHasConvolutionLayout( + layers[i - 1].getLayerConfiguration()) + && ConvolutionUtils.layerHasConvolutionLayout(layers[i].getLayerConfiguration())) { CNN2DFormat preLayerFormat = ConvolutionUtils.getFormatForLayer( - layers[i - 1].conf().getLayer()); + layers[i - 1].getLayerConfiguration()); CNN2DFormat currLayerFormat = ConvolutionUtils.getFormatForLayer( - layers[i].conf().getLayer()); + layers[i].getLayerConfiguration()); if (preLayerFormat != currLayerFormat) { //NHWC case if (preLayerFormat == CNN2DFormat.NCHW) { @@ -1474,12 +1475,13 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } input = layers[i].activate(input, train, mgr); - } else if (i > 0 && Convolution1DUtils.hasRnnDataFormat(layers[i - 1].conf().getLayer()) - && Convolution1DUtils.hasRnnDataFormat(layers[i].conf().getLayer())) { + } else if (i > 0 && Convolution1DUtils.hasRnnDataFormat( + layers[i - 1].getLayerConfiguration()) + && Convolution1DUtils.hasRnnDataFormat(layers[i].getLayerConfiguration())) { RNNFormat preLayerFormat = Convolution1DUtils.getRnnFormatFromLayer( - layers[i - 1].conf().getLayer()); + layers[i - 1].getLayerConfiguration()); RNNFormat currLayerFormat = Convolution1DUtils.getRnnFormatFromLayer( - layers[i].conf().getLayer()); + layers[i].getLayerConfiguration()); //permute for next layer if (preLayerFormat != currLayerFormat) { input = input.permute(0, 2, 1); @@ -1653,7 +1655,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial if (!initCalled) { init(); } - MultiLayerConfiguration conf = this.layerWiseConfigurations.clone(); + NeuralNetConfiguration conf = this.getNetConfiguration().clone(); MultiLayerNetwork ret = new MultiLayerNetwork(conf); ret.init(this.params().dup(), false); @@ -1698,7 +1700,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial /** * Returns a 1 x m vector where the vector is composed of a flattened vector of all of the - * parameters in the network.
See {@link #getParam(String)} and {@link #paramTable()} for a + * parameters in the network.
See {@link #getParam(String)} and {@link #getParamTable()} for a * more useful/interpretable representation of the parameters.
Note that the parameter vector * is not a copy, and changes to the returned INDArray will impact the network parameters. * @@ -1709,6 +1711,28 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial return flattenedParams; } + /** + * The param table + * + * @return + */ + @Override + public Map getParamTable() { + return null; + } + + /** + * Table of parameters by key, for backprop. For many models (dense layers, etc) - all parameters + * are backprop parameters + * + * @param backpropParamsOnly If true, return backprop params only. If false: return all params + * (equivalent to paramsTable()) + */ + @Override + public Map getParamTable(boolean backpropParamsOnly) { + return null; + } + /** * Set the parameters for this model. This expects a linear ndarray which then be unpacked * internally relative to the expected ordering of the model.
See also: @@ -1868,7 +1892,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } LayerWorkspaceMgr workspaceMgr; - if (getLayerWiseConfigurations().getTrainingWorkspaceMode() == WorkspaceMode.NONE) { + if (getNetConfiguration().getTrainingWorkspaceMode() == WorkspaceMode.NONE) { workspaceMgr = LayerWorkspaceMgr.noWorkspaces(); } else { workspaceMgr = LayerWorkspaceMgr.builder() @@ -1908,7 +1932,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial boolean hasMaskArrays = next.hasMaskArrays(); - if (layerWiseConfigurations.getBackpropType() == BackpropType.TruncatedBPTT) { + if (getNetConfiguration().getBackpropType() == BackpropType.TruncatedBPTT) { doTruncatedBPTT(next.getFeatures(), next.getLabels(), next.getFeaturesMaskArray(), next.getLabelsMaskArray(), workspaceMgr); } else { @@ -1921,7 +1945,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial if (solver == null) { try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { - solver = new Solver.Builder().configure(conf()).listeners(getListeners()).model(this) + solver = new Solver.Builder().configure(getNetConfiguration()).listeners(getListeners()).model(this) .build(); } } @@ -1983,7 +2007,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial setLayerMaskArrays(fMask, labelMask); LayerWorkspaceMgr mgr; - if (layerWiseConfigurations.getTrainingWorkspaceMode() == WorkspaceMode.NONE) { + if (getNetConfiguration().getTrainingWorkspaceMode() == WorkspaceMode.NONE) { mgr = LayerWorkspaceMgr.noWorkspaces(); } else { mgr = LayerWorkspaceMgr.builder() @@ -1997,7 +2021,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial WS_RNN_LOOP_WORKING_MEM_CONFIG) .build(); - if (layerWiseConfigurations.getCacheMode() != null) { + if (getNetConfiguration().getCacheMode() != null) { //For now: store cache mode activations in activations workspace mgr.setWorkspace(ArrayType.FF_CACHE, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG); } @@ -2018,8 +2042,8 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } } INDArray inputToOutputLayer = activations.get(activations.size() - 1); - if (layerWiseConfigurations.getInputPreProcess(layers.length - 1) != null) { - inputToOutputLayer = layerWiseConfigurations.getInputPreProcess(layers.length - 1) + if (getNetConfiguration().getInputPreProcess(layers.length - 1) != null) { + inputToOutputLayer = getNetConfiguration().getInputPreProcess(layers.length - 1) .preProcess(inputToOutputLayer, getInputMiniBatchSize(), mgr); //Validate activations location } @@ -2059,7 +2083,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial LayerWorkspaceMgr mgrEven; LayerWorkspaceMgr mgrOdd; - if (layerWiseConfigurations.getTrainingWorkspaceMode() == WorkspaceMode.NONE) { + if (getNetConfiguration().getTrainingWorkspaceMode() == WorkspaceMode.NONE) { mgrEven = LayerWorkspaceMgr.noWorkspaces(); mgrOdd = mgrEven; WorkspaceUtils.assertNoWorkspacesOpen( @@ -2188,7 +2212,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial //TBPTT gradient if (layers[i] instanceof RecurrentLayer) { currPair = ((RecurrentLayer) layers[i]).tbpttBackpropGradient(currPair.getSecond(), - layerWiseConfigurations.getTbpttBackLength(), workspaceMgr); + getNetConfiguration().getTbpttBackLength(), workspaceMgr); } else { currPair = layers[i].backpropGradient(currPair.getSecond(), workspaceMgr); } @@ -2208,9 +2232,9 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial gradientList.addLast(new Triple<>(multiGradientKey, entry.getValue(), currPair.getFirst().flatteningOrderForVariable(origName))); } - if (getLayerWiseConfigurations().getInputPreProcess(i) != null) { + if (getNetConfiguration().getInputPreProcess(i) != null) { currPair = new Pair<>(currPair.getFirst(), - this.layerWiseConfigurations.getInputPreProcess(i) + this.getNetConfiguration().getInputPreProcess(i) .backprop(currPair.getSecond(), getInputMiniBatchSize(), workspaceMgr)); if (i > 0 && currPair.getSecond() != null) { validateArrayWorkspaces(workspaceMgr, currPair.getSecond(), ArrayType.ACTIVATION_GRAD, @@ -2276,7 +2300,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } } - if (layerWiseConfigurations.getTrainingWorkspaceMode() == WorkspaceMode.NONE) { + if (getNetConfiguration().getTrainingWorkspaceMode() == WorkspaceMode.NONE) { WorkspaceUtils.assertNoWorkspacesOpen( "Expected no workspace active in calcBackpropGradients when " + "training workspace is set to none"); @@ -2313,7 +2337,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial return; } - int fwdLen = layerWiseConfigurations.getTbpttFwdLength(); + int fwdLen = getNetConfiguration().getTbpttFwdLength(); update(TaskUtils.buildTask(input, labels)); val timeSeriesLength = input.size(2); long nSubsets = timeSeriesLength / fwdLen; @@ -2342,7 +2366,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial if (solver == null) { try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { - solver = new Solver.Builder().configure(conf()).listeners(getListeners()).model(this) + solver = new Solver.Builder().configure(getNetConfiguration()).listeners(getListeners()).model(this) .build(); } } @@ -2401,7 +2425,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } @Override - public void setListeners(Collection listeners) { + public void setListeners(TrainingListener ... listeners) { if (layers == null) { init(); } @@ -2410,30 +2434,15 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } if (solver != null) { - solver.setListeners(listeners); + solver.setListeners(List.of(listeners)); } this.trainingListeners.clear(); if (listeners != null) { - this.trainingListeners.addAll(listeners); + this.trainingListeners.addAll(List.of(listeners)); } } - @Override - public void setListeners(TrainingListener... listeners) { - Collection cListeners = new ArrayList<>(); - //Check: user might have done setListeners(null) thinking this would clear the current listeners. - //This results in an TrainingListener[1] with a single null value -> results in a NPE later - if (listeners != null && listeners.length > 0) { - for (TrainingListener i : listeners) { - if (i != null) { - cListeners.add(i); - } - } - } - setListeners(cListeners); - } - /** * @deprecated Use {@link #getListeners()} */ @@ -2542,7 +2551,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial update(TaskUtils.buildTask(features, labels)); LayerWorkspaceMgr workspaceMgr; - if (layerWiseConfigurations.getTrainingWorkspaceMode() == null) { + if (getNetConfiguration().getTrainingWorkspaceMode() == null) { workspaceMgr = LayerWorkspaceMgr.noWorkspaces(); } else { workspaceMgr = LayerWorkspaceMgr.builder() @@ -2556,12 +2565,12 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } workspaceMgr.setHelperWorkspacePointers(helperWorkspaces); - if (layerWiseConfigurations.getBackpropType() == BackpropType.TruncatedBPTT) { + if (getNetConfiguration().getBackpropType() == BackpropType.TruncatedBPTT) { doTruncatedBPTT(features, labels, featuresMask, labelsMask, workspaceMgr); } else { if (solver == null) { try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { - solver = new Solver.Builder().configure(conf()).listeners(getListeners()).model(this) + solver = new Solver.Builder().configure(getNetConfiguration()).listeners(getListeners()).model(this) .build(); } } @@ -2599,7 +2608,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial @Override public void fit(INDArray examples, int[] labels) { org.deeplearning4j.nn.conf.layers.OutputLayer layerConf = - (org.deeplearning4j.nn.conf.layers.OutputLayer) getOutputLayer().conf().getLayer(); + (org.deeplearning4j.nn.conf.layers.OutputLayer) getOutputLayer().getLayerConfiguration(); if (layerConf.getNOut() > Integer.MAX_VALUE) { throw new ND4JArraySizeException(); @@ -2861,8 +2870,8 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial "Final layer is of type: " + getOutputLayer().getClass()); } - WorkspaceMode wsm = (training ? layerWiseConfigurations.getTrainingWorkspaceMode() - : layerWiseConfigurations.getInferenceWorkspaceMode()); + WorkspaceMode wsm = (training ? getNetConfiguration().getTrainingWorkspaceMode() + : getNetConfiguration().getInferenceWorkspaceMode()); LayerWorkspaceMgr mgr; if (wsm == WorkspaceMode.NONE) { mgr = LayerWorkspaceMgr.noWorkspaces(); @@ -2886,8 +2895,8 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial throw new ND4JArraySizeException(); } IOutputLayer ol = (IOutputLayer) getOutputLayer(); - if (getLayerWiseConfigurations().getInputPreProcess(layers.length - 1) != null) { - inputToOutputLayer = getLayerWiseConfigurations().getInputPreProcess(layers.length - 1) + if (getNetConfiguration().getInputPreProcess(layers.length - 1) != null) { + inputToOutputLayer = getNetConfiguration().getInputPreProcess(layers.length - 1) .preProcess(inputToOutputLayer, (int) data.getFeatures().size(0), mgr); } ol.setInput(inputToOutputLayer, mgr); //Feedforward doesn't include output layer for efficiency @@ -2953,12 +2962,12 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial INDArray out; if (getOutputLayer() instanceof IOutputLayer) { IOutputLayer ol = (IOutputLayer) getOutputLayer(); - if (layerWiseConfigurations.getInputPreProcess(layers.length - 1) != null) { + if (getNetConfiguration().getInputPreProcess(layers.length - 1) != null) { if (data.getFeatures().size(0) > Integer.MAX_VALUE) { throw new ND4JArraySizeException(); } - inputLast = layerWiseConfigurations.getInputPreProcess(layers.length - 1) + inputLast = getNetConfiguration().getInputPreProcess(layers.length - 1) .preProcess(inputLast, (int) data.getFeatures().size(0), mgr); } @@ -3023,7 +3032,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial //Note: Workspace manager is only ose here for score calculation... other workspace managers are used in the // various FF/backprop methds LayerWorkspaceMgr mgr; - if (layerWiseConfigurations.getTrainingWorkspaceMode() == WorkspaceMode.NONE) { + if (getNetConfiguration().getTrainingWorkspaceMode() == WorkspaceMode.NONE) { mgr = LayerWorkspaceMgr.noWorkspaces(); } else { mgr = LayerWorkspaceMgr.builder() @@ -3037,13 +3046,13 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial WS_RNN_LOOP_WORKING_MEM_CONFIG) .build(); - if (layerWiseConfigurations.getCacheMode() != null) { + if (getNetConfiguration().getCacheMode() != null) { //For now: store cache mode activations in activations workspace mgr.setWorkspace(ArrayType.FF_CACHE, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG); } } - boolean tbptt = layerWiseConfigurations.getBackpropType() == BackpropType.TruncatedBPTT; + boolean tbptt = getNetConfiguration().getBackpropType() == BackpropType.TruncatedBPTT; FwdPassType fwdType = (tbptt ? FwdPassType.RNN_ACTIVATE_WITH_STORED_STATE : FwdPassType.STANDARD); synchronizeIterEpochCounts(); @@ -3062,8 +3071,8 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } } INDArray inputToOutputLayer = activations.get(activations.size() - 1); - if (layerWiseConfigurations.getInputPreProcess(layers.length - 1) != null) { - inputToOutputLayer = layerWiseConfigurations.getInputPreProcess(layers.length - 1) + if (getNetConfiguration().getInputPreProcess(layers.length - 1) != null) { + inputToOutputLayer = getNetConfiguration().getInputPreProcess(layers.length - 1) .preProcess(inputToOutputLayer, getInputMiniBatchSize(), mgr); //Validate activations location } @@ -3138,12 +3147,6 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial setParams(params); } - /** - * Intended for internal/developer use - */ - public NeuralNetConfiguration getDefaultConfiguration() { - return defaultConfiguration; - } public INDArray getLabels() { return labels; @@ -3189,7 +3192,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial * @return the number of layers in the network */ public int getnLayers() { - return layerWiseConfigurations.getConfs().size(); + return getNetConfiguration().getFlattenedLayerConfigurations().size(); } /** @@ -3210,12 +3213,17 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial return layers[i]; } - public Layer getLayer(String name) { - return layerMap.get(name); + public Layer getLayer(@NotNull String name) { + return Arrays.stream(layers) + .filter(l -> !l.getLayerConfiguration().getLayerName().equals(name)) + .findFirst() + .get(); } public List getLayerNames() { - return new ArrayList<>(layerMap.keySet()); + return Arrays.stream(layers) + .map(l -> l.getLayerConfiguration().getLayerName()) + .collect(Collectors.toList()); } public INDArray getMask() { @@ -3253,7 +3261,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } //========== - //Layer methods + //LayerConfiguration methods @Override public Pair feedForwardMaskArray(INDArray maskArray, @@ -3266,7 +3274,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } else { //Do a forward pass through each preprocessor and layer for (int i = 0; i < layers.length; i++) { - InputPreProcessor preProcessor = getLayerWiseConfigurations().getInputPreProcess(i); + InputPreProcessor preProcessor = getNetConfiguration().getInputPreProcess(i); if (preProcessor != null) { Pair p = @@ -3342,22 +3350,22 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial @Override public int getIterationCount() { - return getLayerWiseConfigurations().getIterationCount(); + return getNetConfiguration().getIterationCount(); } @Override public void setIterationCount(int iterationCount) { - getLayerWiseConfigurations().setIterationCount(iterationCount); + getNetConfiguration().setIterationCount(iterationCount); } @Override public int getEpochCount() { - return getLayerWiseConfigurations().getEpochCount(); + return getNetConfiguration().getEpochCount(); } @Override public void setEpochCount(int epochCount) { - getLayerWiseConfigurations().setEpochCount(epochCount); + getNetConfiguration().setEpochCount(epochCount); } @Override @@ -3407,7 +3415,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial @Override public int getInputMiniBatchSize() { - if (!conf().isMiniBatch()) { + if (!getNetConfiguration().isMiniBatch()) { return 1; } @@ -3498,7 +3506,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial l = ((org.deeplearning4j.nn.layers.wrapper.BaseWrapperLayer) l).getUnderlying(); } if (!(l instanceof RecurrentLayer)) { - throw new IllegalArgumentException("Layer is not an RNN layer"); + throw new IllegalArgumentException("LayerConfiguration is not an RNN layer"); } return ((RecurrentLayer) l).rnnGetPreviousState(); } @@ -3518,7 +3526,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial l = ((org.deeplearning4j.nn.layers.wrapper.BaseWrapperLayer) l).getUnderlying(); } if (!(l instanceof RecurrentLayer)) { - throw new IllegalArgumentException("Layer is not an RNN layer"); + throw new IllegalArgumentException("LayerConfiguration is not an RNN layer"); } RecurrentLayer r = (RecurrentLayer) l; r.rnnSetPreviousState(state); @@ -3575,7 +3583,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial */ public void setUpdater(Updater updater) { if (solver == null) { - solver = new Solver.Builder().configure(conf()).listeners(getListeners()).model(this).build(); + solver = new Solver.Builder().configure(getNetConfiguration()).listeners(getListeners()).model(this).build(); } solver.getOptimizer().setUpdater(updater); } @@ -3584,7 +3592,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial if (solver == null && initializeIfReq) { synchronized (this) { if (solver == null) { //May have been created while waiting for lock - solver = new Solver.Builder().configure(conf()).listeners(getListeners()).model(this) + solver = new Solver.Builder().configure(getNetConfiguration()).listeners(getListeners()).model(this) .build(); solver.getOptimizer().setUpdater(UpdaterCreator.getUpdater(this)); } @@ -3605,9 +3613,8 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial * [miniBatchSize,timeSeriesLength] and contain values 0 or 1 at each element (to specify whether * a given input/example is present - or merely padding - at a given time step).
* NOTE: This method is not usually used directly. Instead, methods such as - * {@link #feedForward(INDArray, INDArray, INDArray)} - * and {@link #output(INDArray, boolean, INDArray, INDArray)} handle setting of masking - * internally. + * {@link #feedForward(INDArray, INDArray, INDArray)} and + * {@link #output(INDArray, boolean, INDArray, INDArray)} handle setting of masking internally. * * @param featuresMaskArray Mask array for features (input) * @param labelsMaskArray Mask array for labels (output) @@ -3723,8 +3730,8 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial */ public T evaluateROC(DataSetIterator iterator, int rocThresholdSteps) { Layer outputLayer = getOutputLayer(); - if (getLayerWiseConfigurations().isValidateOutputLayerConfig()) { - OutputLayerUtil.validateOutputLayerForClassifierEvaluation(outputLayer.conf().getLayer(), + if (getNetConfiguration().isValidateOutputLayerConfig()) { + OutputLayerUtil.validateOutputLayerForClassifierEvaluation(outputLayer.getLayerConfiguration(), ROC.class); } return (T) doEvaluation(iterator, new org.deeplearning4j.eval.ROC(rocThresholdSteps))[0]; @@ -3749,8 +3756,8 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial public T evaluateROCMultiClass(DataSetIterator iterator, int rocThresholdSteps) { Layer outputLayer = getOutputLayer(); - if (getLayerWiseConfigurations().isValidateOutputLayerConfig()) { - OutputLayerUtil.validateOutputLayerForClassifierEvaluation(outputLayer.conf().getLayer(), + if (getNetConfiguration().isValidateOutputLayerConfig()) { + OutputLayerUtil.validateOutputLayerForClassifierEvaluation(outputLayer.getLayerConfiguration(), ROCMultiClass.class); } return (T) doEvaluation(iterator, @@ -3780,9 +3787,9 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial DataSetIterator iter = iterator.asyncSupported() ? new AsyncDataSetIterator(iterator, 2, true) : iterator; - WorkspaceMode cMode = layerWiseConfigurations.getTrainingWorkspaceMode(); - layerWiseConfigurations.setTrainingWorkspaceMode( - layerWiseConfigurations.getInferenceWorkspaceMode()); + WorkspaceMode cMode = getNetConfiguration().getTrainingWorkspaceMode(); + getNetConfiguration().setTrainingWorkspaceMode( + getNetConfiguration().getInferenceWorkspaceMode()); //First: let's determine if we should do 'split feed forward' for long time series //The idea: RNN 20k time steps. Train using TBPTT length 100 -> 200 segments of length 100. If we naively @@ -3790,11 +3797,11 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial // evaluation in segments... //Only do this if TBPTT is enabled - if not, it means we can train without TBPTT and hence should be able // to test without splitting also - boolean useRnnSegments = (layerWiseConfigurations.getBackpropType() + boolean useRnnSegments = (getNetConfiguration().getBackpropType() == BackpropType.TruncatedBPTT); MemoryWorkspace outputWs; - if (getLayerWiseConfigurations().getInferenceWorkspaceMode() == WorkspaceMode.ENABLED) { + if (getNetConfiguration().getInferenceWorkspaceMode() == WorkspaceMode.ENABLED) { outputWs = Nd4j.getWorkspaceManager() .getWorkspaceForCurrentThread(WS_ALL_LAYERS_ACT_CONFIG, WS_OUTPUT_MEM); } else { @@ -3830,7 +3837,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial rnnClearPreviousState(); //Get subset of features and labels: - val fwdLen = layerWiseConfigurations.getTbpttFwdLength(); + val fwdLen = getNetConfiguration().getTbpttFwdLength(); val tsLength = features.size(2); long nSubsets = tsLength / fwdLen; if (tsLength % fwdLen != 0) { @@ -3867,7 +3874,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial ((AsyncDataSetIterator) iter).shutdown(); } - layerWiseConfigurations.setTrainingWorkspaceMode(cMode); + getNetConfiguration().setTrainingWorkspaceMode(cMode); return evaluations; } @@ -3973,8 +3980,8 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } Layer outputLayer = getOutputLayer(); - if (getLayerWiseConfigurations().isValidateOutputLayerConfig()) { - OutputLayerUtil.validateOutputLayerForClassifierEvaluation(outputLayer.conf().getLayer(), + if (getNetConfiguration().isValidateOutputLayerConfig()) { + OutputLayerUtil.validateOutputLayerForClassifierEvaluation(outputLayer.getLayerConfiguration(), Evaluation.class); } @@ -4034,7 +4041,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial int frozenParams = 0; for (org.deeplearning4j.nn.api.Layer currentLayer : getLayers()) { - String name = currentLayer.conf().getLayer().getLayerName(); + String name = currentLayer.getLayerConfiguration().getLayerName(); if (name == null) { name = String.valueOf(currentLayer.getIndex()); } @@ -4049,13 +4056,14 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial InputPreProcessor preProcessor; InputType outType; if (inputType != null) { - preProcessor = getLayerWiseConfigurations().getInputPreProcess(currentLayer.getIndex()); + preProcessor = getNetConfiguration().getInputPreProcess(currentLayer.getIndex()); inShape = inputType.toString(); if (preProcessor != null) { inputType = preProcessor.getOutputType(inputType); inShape += "--> " + inputType.toString(); } - outType = currentLayer.conf().getLayer().getOutputType(currentLayer.getIndex(), inputType); + outType = currentLayer.getLayerConfiguration() + .getOutputType(currentLayer.getIndex(), inputType); outShape = outType.toString(); inputType = outType; } @@ -4063,19 +4071,20 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial paramShape = ""; if (currentLayer instanceof BidirectionalLayer) { // Bidirectional layer is not an FFL BidirectionalLayer bi = (BidirectionalLayer) currentLayer; - in = String.valueOf(((Bidirectional) bi.conf().getLayer()).getNIn()); - out = String.valueOf(((Bidirectional) bi.conf().getLayer()).getNOut()); + in = String.valueOf(((Bidirectional) bi.getLayerConfiguration()).getNIn()); + out = String.valueOf(((Bidirectional) bi.getLayerConfiguration()).getNOut()); } else { try { - in = String.valueOf(((FeedForwardLayer) currentLayer.conf().getLayer()).getNIn()); - out = String.valueOf(((FeedForwardLayer) currentLayer.conf().getLayer()).getNOut()); + in = String.valueOf(((FeedForwardLayer) currentLayer.getLayerConfiguration()).getNIn()); + out = String.valueOf( + ((FeedForwardLayer) currentLayer.getLayerConfiguration()).getNOut()); } catch ( Exception e) { // Some layers, like PReLU, are just BaseLayers (but have parameters) } } - Set paraNames = currentLayer.paramTable().keySet(); + Set paraNames = currentLayer.getParamTable().keySet(); for (String aP : paraNames) { - String paramS = ArrayUtils.toString(currentLayer.paramTable().get(aP).shape()); + String paramS = ArrayUtils.toString(currentLayer.getParamTable().get(aP).shape()); paramShape += aP + ":" + paramS + ", "; } paramShape = paramShape.subSequence(0, paramShape.lastIndexOf(",")).toString(); @@ -4168,7 +4177,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial } /** - * Increment the epoch count (in the underlying {@link MultiLayerConfiguration} by 1). Note that + * Increment the epoch count (in the underlying {@link NeuralNetConfiguration} by 1). Note that * this is done automatically when using iterator-based fitting methods, such as * {@link #fit(DataSetIterator)}. However, when using non-iterator fit methods (DataSet, * INDArray/INDArray etc), the network has no way to know when one epoch ends and another starts. @@ -4176,10 +4185,10 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial * epoch counter is used for situations such as some learning rate schedules, and the like. *

* The current epoch count can be obtained using - * {@code MultiLayerConfiguration.getLayerwiseConfiguration().getEpochCount()} + * {@code NeuralNetConfiguration.getLayerwiseConfiguration().getEpochCount()} */ public void incrementEpochCount() { - layerWiseConfigurations.setEpochCount(layerWiseConfigurations.getEpochCount() + 1); + getNetConfiguration().setEpochCount(getNetConfiguration().getEpochCount() + 1); synchronizeIterEpochCounts(); } @@ -4246,8 +4255,8 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial try (MemoryWorkspace ws = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { INDArray newParams = params().castTo(dataType); - String jsonConfig = getLayerWiseConfigurations().toJson(); - MultiLayerConfiguration newConf = MultiLayerConfiguration.fromJson(jsonConfig); + String jsonConfig = getNetConfiguration().toJson(); + NeuralNetConfiguration newConf = NeuralNetConfiguration.fromJson(jsonConfig); newConf.setDataType(dataType); MultiLayerNetwork newNet = new MultiLayerNetwork(newConf); newNet.init(newParams, false); @@ -4267,8 +4276,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial * (fixed) learning rate.
*
* Note: This method not free from a performance point of view: a proper learning - * rate schedule - * should be used in preference to calling this method at every iteration. + * rate schedule should be used in preference to calling this method at every iteration. * * @param newLr New learning rate for all layers * @see #setLearningRate(ISchedule) @@ -4282,8 +4290,8 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial * Set the learning rate schedule for all layers in the network to the specified schedule. This * schedule will replace any/all existing schedules, and also any fixed learning rate values.
* Note that the iteration/epoch counts will not be reset. Use - * {@link MultiLayerConfiguration#setIterationCount(int)} and - * {@link MultiLayerConfiguration#setEpochCount(int)} if this is required + * {@link NeuralNetConfiguration#setIterationCount(int)} and + * {@link NeuralNetConfiguration#setEpochCount(int)} if this is required * * @param newLr New learning rate schedule for all layers * @see #setLearningRate(ISchedule) @@ -4299,10 +4307,9 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial * (fixed) learning rate.
*
* Note: This method not free from a performance point of view: a proper learning - * rate schedule - * should be used in preference to calling this method at every iteration. Note also that - * {@link #setLearningRate(double)} should also be used in preference, when all layers need to be - * set to a new LR + * rate schedule should be used in preference to calling this method at every iteration. Note also + * that {@link #setLearningRate(double)} should also be used in preference, when all layers need + * to be set to a new LR * * @param layerNumber Number of the layer to set the LR for * @param newLr New learning rate for a single layer @@ -4318,8 +4325,8 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial * Note also that {@link #setLearningRate(ISchedule)} should also be used in preference, when all * layers need to be set to a new LR schedule.
This schedule will replace any/all existing * schedules, and also any fixed learning rate values.
Note also that the iteration/epoch - * counts will not be reset. Use {@link MultiLayerConfiguration#setIterationCount(int)} and - * {@link MultiLayerConfiguration#setEpochCount(int)} if this is required + * counts will not be reset. Use {@link NeuralNetConfiguration#setIterationCount(int)} and + * {@link NeuralNetConfiguration#setEpochCount(int)} if this is required * * @param layerNumber Number of the layer to set the LR schedule for * @param newLr New learning rate for a single layer @@ -4335,7 +4342,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial * has no learning rate (no parameters, or an updater without a learning rate) then null is * returned * - * @param layerNumber Layer number to get the learning rate for + * @param layerNumber LayerConfiguration number to get the learning rate for * @return Learning rate for the specified layer, or null */ public Double getLearningRate(int layerNumber) { @@ -4355,10 +4362,10 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial public int layerSize(int layer) { if (layer < 0 || layer > layers.length) { throw new IllegalArgumentException( - "Invalid layer index: " + layer + ". Layer index must be between 0 and " + "Invalid layer index: " + layer + ". LayerConfiguration index must be between 0 and " + (layers.length - 1) + " inclusive"); } - org.deeplearning4j.nn.conf.layers.Layer conf = layers[layer].conf().getLayer(); + LayerConfiguration conf = layers[layer].getLayerConfiguration(); if (conf == null || !(conf instanceof FeedForwardLayer)) { return 0; } @@ -4384,10 +4391,10 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial public int layerInputSize(int layer) { if (layer < 0 || layer > layers.length) { throw new IllegalArgumentException( - "Invalid layer index: " + layer + ". Layer index must be between 0 and " + "Invalid layer index: " + layer + ". LayerConfiguration index must be between 0 and " + (layers.length - 1) + " inclusive"); } - org.deeplearning4j.nn.conf.layers.Layer conf = layers[layer].conf().getLayer(); + LayerConfiguration conf = layers[layer].getLayerConfiguration(); if (conf == null || !(conf instanceof FeedForwardLayer)) { return 0; } @@ -4451,8 +4458,8 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial if (obj instanceof MultiLayerNetwork) { MultiLayerNetwork network = (MultiLayerNetwork) obj; boolean paramsEquals = network.params().equals(params()); - boolean confEquals = getLayerWiseConfigurations().equals( - network.getLayerWiseConfigurations()); + boolean confEquals = getNetConfiguration().equals( + network.getNetConfiguration()); boolean updaterEquals = getUpdater().equals(network.getUpdater()); return paramsEquals && confEquals && updaterEquals; } @@ -4466,15 +4473,15 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial private void readObject(ObjectInputStream ois) throws ClassNotFoundException, IOException { val mln = ModelSerializer.restoreMultiLayerNetwork(ois, true); - this.defaultConfiguration = mln.defaultConfiguration.clone(); - this.layerWiseConfigurations = mln.layerWiseConfigurations.clone(); + this.setNetConfiguration( mln.getNetConfiguration().clone() ); this.init(); this.flattenedParams.assign(mln.flattenedParams); - int numWorkingMem = 2 * (layerWiseConfigurations.getConfs().size() - + layerWiseConfigurations.getInputPreProcessors().size()); + int numWorkingMem = 2 * (getNetConfiguration().getFlattenedLayerConfigurations().size() + + getNetConfiguration().getInputPreProcessors().size()); WS_LAYER_WORKING_MEM_CONFIG = getLayerWorkingMemWSConfig(numWorkingMem); - WS_LAYER_ACT_X_CONFIG = getLayerActivationWSConfig(layerWiseConfigurations.getConfs().size()); + WS_LAYER_ACT_X_CONFIG = getLayerActivationWSConfig( + getNetConfiguration().getFlattenedLayerConfigurations().size()); if (mln.getUpdater() != null && mln.getUpdater(false).getStateViewArray() != null) { this.getUpdater(true).getStateViewArray().assign(mln.getUpdater(false).getStateViewArray()); @@ -4508,4 +4515,14 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serial Nd4j.getWorkspaceManager().destroyAllWorkspacesForCurrentThread(); System.gc(); } + + /** + * Returns a string representation of the underlying configuration. + * + * @return a string representation of the configuration. + */ + @Override + public String toString() { + return getNetConfiguration().toString(); + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/BatchNormalizationParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/BatchNormalizationParamInitializer.java index 5215e2276..c68403835 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/BatchNormalizationParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/BatchNormalizationParamInitializer.java @@ -21,16 +21,15 @@ package org.deeplearning4j.nn.params; import lombok.val; -import org.deeplearning4j.nn.api.ParamInitializer; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.api.AbstractParamInitializer; import org.deeplearning4j.nn.conf.layers.BatchNormalization; -import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.indexing.NDArrayIndex; import java.util.*; -public class BatchNormalizationParamInitializer implements ParamInitializer { +public class BatchNormalizationParamInitializer extends AbstractParamInitializer { private static final BatchNormalizationParamInitializer INSTANCE = new BatchNormalizationParamInitializer(); @@ -45,12 +44,7 @@ public class BatchNormalizationParamInitializer implements ParamInitializer { public static final String GLOBAL_LOG_STD = "log10stdev"; @Override - public long numParams(NeuralNetConfiguration conf) { - return numParams(conf.getLayer()); - } - - @Override - public long numParams(Layer l) { + public long numParams(LayerConfiguration l) { BatchNormalization layer = (BatchNormalization) l; //Parameters in batch norm: //gamma, beta, global mean estimate, global variance estimate @@ -66,7 +60,7 @@ public class BatchNormalizationParamInitializer implements ParamInitializer { } @Override - public List paramKeys(Layer layer) { + public List paramKeys(LayerConfiguration layer) { if(((BatchNormalization)layer).isUseLogStd()){ return Arrays.asList(GAMMA, BETA, GLOBAL_MEAN, GLOBAL_LOG_STD); } else { @@ -75,30 +69,30 @@ public class BatchNormalizationParamInitializer implements ParamInitializer { } @Override - public List weightKeys(Layer layer) { + public List weightKeys(LayerConfiguration layer) { return Collections.emptyList(); } @Override - public List biasKeys(Layer layer) { + public List biasKeys(LayerConfiguration layer) { return Collections.emptyList(); } @Override - public boolean isWeightParam(Layer layer, String key) { + public boolean isWeightParam(LayerConfiguration layer, String key) { return false; } @Override - public boolean isBiasParam(Layer layer, String key) { + public boolean isBiasParam(LayerConfiguration layer, String key) { return false; } @Override - public Map init(NeuralNetConfiguration conf, INDArray paramView, boolean initializeParams) { + public Map init(LayerConfiguration conf, INDArray paramView, boolean initializeParams) { Map params = Collections.synchronizedMap(new LinkedHashMap()); // TODO setup for RNN - BatchNormalization layer = (BatchNormalization) conf.getLayer(); + BatchNormalization layer = (BatchNormalization) conf; val nOut = layer.getNOut(); long meanOffset = 0; @@ -107,9 +101,9 @@ public class BatchNormalizationParamInitializer implements ParamInitializer { INDArray betaView = paramView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(nOut, 2 * nOut)); params.put(GAMMA, createGamma(conf, gammaView, initializeParams)); - conf.addVariable(GAMMA); + conf.getNetConfiguration().addNetWideVariable(GAMMA); params.put(BETA, createBeta(conf, betaView, initializeParams)); - conf.addVariable(BETA); + conf.getNetConfiguration().addNetWideVariable(BETA); meanOffset = 2 * nOut; } @@ -131,21 +125,21 @@ public class BatchNormalizationParamInitializer implements ParamInitializer { } params.put(GLOBAL_MEAN, globalMeanView); - conf.addVariable(GLOBAL_MEAN); + conf.getNetConfiguration().addNetWideVariable(GLOBAL_MEAN); if(layer.isUseLogStd()){ params.put(GLOBAL_LOG_STD, globalVarView); - conf.addVariable(GLOBAL_LOG_STD); + conf.getNetConfiguration().addNetWideVariable(GLOBAL_LOG_STD); } else { params.put(GLOBAL_VAR, globalVarView); - conf.addVariable(GLOBAL_VAR); + conf.getNetConfiguration().addNetWideVariable(GLOBAL_VAR); } return params; } @Override - public Map getGradientsFromFlattened(NeuralNetConfiguration conf, INDArray gradientView) { - BatchNormalization layer = (BatchNormalization) conf.getLayer(); + public Map getGradientsFromFlattened(LayerConfiguration conf, INDArray gradientView) { + BatchNormalization layer = (BatchNormalization) conf; val nOut = layer.getNOut(); Map out = new LinkedHashMap<>(); @@ -171,15 +165,15 @@ public class BatchNormalizationParamInitializer implements ParamInitializer { return out; } - private INDArray createBeta(NeuralNetConfiguration conf, INDArray betaView, boolean initializeParams) { - BatchNormalization layer = (BatchNormalization) conf.getLayer(); + private INDArray createBeta(LayerConfiguration conf, INDArray betaView, boolean initializeParams) { + BatchNormalization layer = (BatchNormalization) conf; if (initializeParams) betaView.assign(layer.getBeta()); return betaView; } - private INDArray createGamma(NeuralNetConfiguration conf, INDArray gammaView, boolean initializeParams) { - BatchNormalization layer = (BatchNormalization) conf.getLayer(); + private INDArray createGamma(LayerConfiguration conf, INDArray gammaView, boolean initializeParams) { + BatchNormalization layer = (BatchNormalization) conf; if (initializeParams) gammaView.assign(layer.getGamma()); return gammaView; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/BidirectionalParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/BidirectionalParamInitializer.java index a75128790..27905d60f 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/BidirectionalParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/BidirectionalParamInitializer.java @@ -21,12 +21,10 @@ package org.deeplearning4j.nn.params; import lombok.val; +import org.deeplearning4j.nn.api.AbstractParamInitializer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.layers.BaseLayer; -import org.deeplearning4j.nn.conf.layers.BaseRecurrentLayer; -import org.deeplearning4j.nn.conf.layers.FeedForwardLayer; -import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.recurrent.Bidirectional; import org.nd4j.linalg.api.ndarray.INDArray; @@ -36,14 +34,13 @@ import java.util.List; import java.util.Map; import static org.nd4j.linalg.indexing.NDArrayIndex.interval; -import static org.nd4j.linalg.indexing.NDArrayIndex.point; -public class BidirectionalParamInitializer implements ParamInitializer { +public class BidirectionalParamInitializer extends AbstractParamInitializer { public static final String FORWARD_PREFIX = "f"; public static final String BACKWARD_PREFIX = "b"; private final Bidirectional layer; - private final Layer underlying; + private final LayerConfiguration underlying; private List paramKeys; private List weightKeys; @@ -55,19 +52,14 @@ public class BidirectionalParamInitializer implements ParamInitializer { } @Override - public long numParams(NeuralNetConfiguration conf) { - return numParams(conf.getLayer()); - } - - @Override - public long numParams(Layer layer) { + public long numParams(LayerConfiguration layer) { return 2 * underlying(layer).initializer().numParams(underlying(layer)); } @Override - public List paramKeys(Layer layer) { + public List paramKeys(LayerConfiguration layer) { if(paramKeys == null) { - Layer u = underlying(layer); + LayerConfiguration u = underlying(layer); List orig = u.initializer().paramKeys(u); paramKeys = withPrefixes(orig); } @@ -75,9 +67,9 @@ public class BidirectionalParamInitializer implements ParamInitializer { } @Override - public List weightKeys(Layer layer) { + public List weightKeys(LayerConfiguration layer) { if(weightKeys == null) { - Layer u = underlying(layer); + LayerConfiguration u = underlying(layer); List orig = u.initializer().weightKeys(u); weightKeys = withPrefixes(orig); } @@ -85,9 +77,9 @@ public class BidirectionalParamInitializer implements ParamInitializer { } @Override - public List biasKeys(Layer layer) { + public List biasKeys(LayerConfiguration layer) { if(biasKeys == null) { - Layer u = underlying(layer); + LayerConfiguration u = underlying(layer); List orig = u.initializer().weightKeys(u); biasKeys = withPrefixes(orig); } @@ -95,27 +87,27 @@ public class BidirectionalParamInitializer implements ParamInitializer { } @Override - public boolean isWeightParam(Layer layer, String key) { + public boolean isWeightParam(LayerConfiguration layer, String key) { return weightKeys(this.layer).contains(key); } @Override - public boolean isBiasParam(Layer layer, String key) { + public boolean isBiasParam(LayerConfiguration layer, String key) { return biasKeys(this.layer).contains(key); } @Override - public Map init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { + public Map init(LayerConfiguration conf, INDArray paramsView, boolean initializeParams) { val n = paramsView.length()/2; INDArray forwardView = paramsView.get(interval(0,0,true), interval(0, n)); INDArray backwardView = paramsView.get(interval(0,0,true), interval(n, 2*n)); conf.clearVariables(); - NeuralNetConfiguration c1 = conf.clone(); - NeuralNetConfiguration c2 = conf.clone(); - c1.setLayer(underlying); - c2.setLayer(underlying); + LayerConfiguration c1 = conf.clone(); + LayerConfiguration c2 = conf.clone(); + //c1.setLayer(underlying); + //c2.setLayer(underlying); Map origFwd = underlying.initializer().init(c1, forwardView, initializeParams); Map origBwd = underlying.initializer().init(c2, backwardView, initializeParams); List variables = addPrefixes(c1.getVariables(), c2.getVariables()); @@ -156,7 +148,7 @@ public class BidirectionalParamInitializer implements ParamInitializer { } @Override - public Map getGradientsFromFlattened(NeuralNetConfiguration conf, INDArray gradientView) { + public Map getGradientsFromFlattened(LayerConfiguration conf, INDArray gradientView) { val n = gradientView.length()/2; INDArray forwardView = gradientView.get(interval(0,0,true), interval(0, n)); INDArray backwardView = gradientView.get(interval(0,0,true), interval(n, 2*n)); @@ -175,7 +167,7 @@ public class BidirectionalParamInitializer implements ParamInitializer { return out; } - private Layer underlying(Layer layer){ + private LayerConfiguration underlying(LayerConfiguration layer){ Bidirectional b = (Bidirectional)layer; return b.getFwd(); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/CenterLossParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/CenterLossParamInitializer.java index 65df1fea5..8a02c397e 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/CenterLossParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/CenterLossParamInitializer.java @@ -22,7 +22,9 @@ package org.deeplearning4j.nn.params; import lombok.val; +import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.indexing.NDArrayIndex; @@ -43,20 +45,20 @@ public class CenterLossParamInitializer extends DefaultParamInitializer { public final static String CENTER_KEY = "cL"; @Override - public long numParams(NeuralNetConfiguration conf) { + public long numParams(LayerConfiguration conf) { org.deeplearning4j.nn.conf.layers.FeedForwardLayer layerConf = - (org.deeplearning4j.nn.conf.layers.FeedForwardLayer) conf.getLayer(); + (org.deeplearning4j.nn.conf.layers.FeedForwardLayer) conf; val nIn = layerConf.getNIn(); val nOut = layerConf.getNOut(); // also equal to numClasses return nIn * nOut + nOut + nIn * nOut; //weights + bias + embeddings } @Override - public Map init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { + public Map init(LayerConfiguration conf, INDArray paramsView, boolean initializeParams) { Map params = Collections.synchronizedMap(new LinkedHashMap()); org.deeplearning4j.nn.conf.layers.CenterLossOutputLayer layerConf = - (org.deeplearning4j.nn.conf.layers.CenterLossOutputLayer) conf.getLayer(); + (org.deeplearning4j.nn.conf.layers.CenterLossOutputLayer) conf; val nIn = layerConf.getNIn(); val nOut = layerConf.getNOut(); // also equal to numClasses @@ -81,9 +83,9 @@ public class CenterLossParamInitializer extends DefaultParamInitializer { } @Override - public Map getGradientsFromFlattened(NeuralNetConfiguration conf, INDArray gradientView) { + public Map getGradientsFromFlattened(LayerConfiguration conf, INDArray gradientView) { org.deeplearning4j.nn.conf.layers.CenterLossOutputLayer layerConf = - (org.deeplearning4j.nn.conf.layers.CenterLossOutputLayer) conf.getLayer(); + (org.deeplearning4j.nn.conf.layers.CenterLossOutputLayer) conf; val nIn = layerConf.getNIn(); val nOut = layerConf.getNOut(); // also equal to numClasses @@ -107,10 +109,10 @@ public class CenterLossParamInitializer extends DefaultParamInitializer { } - protected INDArray createCenterLossMatrix(NeuralNetConfiguration conf, INDArray centerLossView, + protected INDArray createCenterLossMatrix(LayerConfiguration conf, INDArray centerLossView, boolean initializeParameters) { org.deeplearning4j.nn.conf.layers.CenterLossOutputLayer layerConf = - (org.deeplearning4j.nn.conf.layers.CenterLossOutputLayer) conf.getLayer(); + (org.deeplearning4j.nn.conf.layers.CenterLossOutputLayer) conf; if (initializeParameters) { centerLossView.assign(0.0); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/Convolution3DParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/Convolution3DParamInitializer.java index 8ebabb433..745e77a69 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/Convolution3DParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/Convolution3DParamInitializer.java @@ -24,7 +24,7 @@ package org.deeplearning4j.nn.params; import lombok.val; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.Convolution3D; -import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.weights.WeightInitUtil; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.indexing.NDArrayIndex; @@ -44,13 +44,9 @@ public class Convolution3DParamInitializer extends ConvolutionParamInitializer { public final static String WEIGHT_KEY = DefaultParamInitializer.WEIGHT_KEY; public final static String BIAS_KEY = DefaultParamInitializer.BIAS_KEY; - @Override - public long numParams(NeuralNetConfiguration conf) { - return numParams(conf.getLayer()); - } @Override - public long numParams(Layer l) { + public long numParams(LayerConfiguration l) { Convolution3D layerConf = (Convolution3D) l; @@ -62,13 +58,13 @@ public class Convolution3DParamInitializer extends ConvolutionParamInitializer { @Override - public Map init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { - Convolution3D layer = (Convolution3D) conf.getLayer(); + public Map init(LayerConfiguration conf, INDArray paramsView, boolean initializeParams) { + Convolution3D layer = (Convolution3D) conf; if (layer.getKernelSize().length != 3) throw new IllegalArgumentException("Filter size must be == 3"); Map params = Collections.synchronizedMap(new LinkedHashMap()); - Convolution3D layerConf = (Convolution3D) conf.getLayer(); + Convolution3D layerConf = (Convolution3D) conf; val nOut = layerConf.getNOut(); if (layer.hasBias()) { @@ -88,9 +84,9 @@ public class Convolution3DParamInitializer extends ConvolutionParamInitializer { } @Override - public Map getGradientsFromFlattened(NeuralNetConfiguration conf, INDArray gradientView) { + public Map getGradientsFromFlattened(LayerConfiguration conf, INDArray gradientView) { - Convolution3D layerConf = (Convolution3D) conf.getLayer(); + Convolution3D layerConf = (Convolution3D) conf; int[] kernel = layerConf.getKernelSize(); val nIn = layerConf.getNIn(); @@ -112,7 +108,7 @@ public class Convolution3DParamInitializer extends ConvolutionParamInitializer { } - protected INDArray createWeightMatrix(NeuralNetConfiguration conf, INDArray weightView, boolean initializeParams) { + protected INDArray createWeightMatrix(LayerConfiguration conf, INDArray weightView, boolean initializeParams) { /* Create a 5d weight matrix of: (number of kernels, num input channels, kernel depth, kernel height, kernel width) @@ -120,7 +116,7 @@ public class Convolution3DParamInitializer extends ConvolutionParamInitializer { Inputs to the convolution layer are: (batch size, num input feature maps, image depth, image height, image width) */ - Convolution3D layerConf = (Convolution3D) conf.getLayer(); + Convolution3D layerConf = (Convolution3D) conf; if (initializeParams) { int[] kernel = layerConf.getKernelSize(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/ConvolutionParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/ConvolutionParamInitializer.java index 4618e2c3e..a8b3ce7aa 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/ConvolutionParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/ConvolutionParamInitializer.java @@ -22,17 +22,16 @@ package org.deeplearning4j.nn.params; import lombok.val; -import org.deeplearning4j.nn.api.ParamInitializer; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.api.AbstractParamInitializer; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; -import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.weights.WeightInitUtil; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.indexing.NDArrayIndex; import java.util.*; -public class ConvolutionParamInitializer implements ParamInitializer { +public class ConvolutionParamInitializer extends AbstractParamInitializer { private static final ConvolutionParamInitializer INSTANCE = new ConvolutionParamInitializer(); @@ -45,12 +44,7 @@ public class ConvolutionParamInitializer implements ParamInitializer { public final static String BIAS_KEY = DefaultParamInitializer.BIAS_KEY; @Override - public long numParams(NeuralNetConfiguration conf) { - return numParams(conf.getLayer()); - } - - @Override - public long numParams(Layer l) { + public long numParams(LayerConfiguration l) { org.deeplearning4j.nn.conf.layers.ConvolutionLayer layerConf = (org.deeplearning4j.nn.conf.layers.ConvolutionLayer) l; @@ -61,7 +55,7 @@ public class ConvolutionParamInitializer implements ParamInitializer { } @Override - public List paramKeys(Layer layer) { + public List paramKeys(LayerConfiguration layer) { org.deeplearning4j.nn.conf.layers.ConvolutionLayer layerConf = (org.deeplearning4j.nn.conf.layers.ConvolutionLayer) layer; if(layerConf.hasBias()){ @@ -72,12 +66,12 @@ public class ConvolutionParamInitializer implements ParamInitializer { } @Override - public List weightKeys(Layer layer) { + public List weightKeys(LayerConfiguration layer) { return Collections.singletonList(WEIGHT_KEY); } @Override - public List biasKeys(Layer layer) { + public List biasKeys(LayerConfiguration layer) { org.deeplearning4j.nn.conf.layers.ConvolutionLayer layerConf = (org.deeplearning4j.nn.conf.layers.ConvolutionLayer) layer; if(layerConf.hasBias()){ @@ -88,24 +82,24 @@ public class ConvolutionParamInitializer implements ParamInitializer { } @Override - public boolean isWeightParam(Layer layer, String key) { + public boolean isWeightParam(LayerConfiguration layer, String key) { return WEIGHT_KEY.equals(key); } @Override - public boolean isBiasParam(Layer layer, String key) { + public boolean isBiasParam(LayerConfiguration layer, String key) { return BIAS_KEY.equals(key); } @Override - public Map init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { - ConvolutionLayer layer = (org.deeplearning4j.nn.conf.layers.ConvolutionLayer) conf.getLayer(); + public Map init(LayerConfiguration conf, INDArray paramsView, boolean initializeParams) { + ConvolutionLayer layer = (org.deeplearning4j.nn.conf.layers.ConvolutionLayer) conf; if (layer.getKernelSize().length != 2) throw new IllegalArgumentException("Filter size must be == 2"); Map params = Collections.synchronizedMap(new LinkedHashMap()); org.deeplearning4j.nn.conf.layers.ConvolutionLayer layerConf = - (org.deeplearning4j.nn.conf.layers.ConvolutionLayer) conf.getLayer(); + (org.deeplearning4j.nn.conf.layers.ConvolutionLayer) conf; val nOut = layerConf.getNOut(); @@ -115,23 +109,23 @@ public class ConvolutionParamInitializer implements ParamInitializer { INDArray weightView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(nOut, numParams(conf))); params.put(BIAS_KEY, createBias(conf, biasView, initializeParams)); params.put(WEIGHT_KEY, createWeightMatrix(conf, weightView, initializeParams)); - conf.addVariable(WEIGHT_KEY); - conf.addVariable(BIAS_KEY); - conf.addVariable(BIAS_KEY); + conf.getNetConfiguration().addNetWideVariable(WEIGHT_KEY); + conf.getNetConfiguration().addNetWideVariable(BIAS_KEY); + conf.getNetConfiguration().addNetWideVariable(BIAS_KEY); } else { INDArray weightView = paramsView; params.put(WEIGHT_KEY, createWeightMatrix(conf, weightView, initializeParams)); - conf.addVariable(WEIGHT_KEY); + conf.getNetConfiguration().addNetWideVariable(WEIGHT_KEY); } return params; } @Override - public Map getGradientsFromFlattened(NeuralNetConfiguration conf, INDArray gradientView) { + public Map getGradientsFromFlattened(LayerConfiguration conf, INDArray gradientView) { org.deeplearning4j.nn.conf.layers.ConvolutionLayer layerConf = - (org.deeplearning4j.nn.conf.layers.ConvolutionLayer) conf.getLayer(); + (org.deeplearning4j.nn.conf.layers.ConvolutionLayer) conf; int[] kernel = layerConf.getKernelSize(); val nIn = layerConf.getNIn(); @@ -154,17 +148,17 @@ public class ConvolutionParamInitializer implements ParamInitializer { } //1 bias per feature map - protected INDArray createBias(NeuralNetConfiguration conf, INDArray biasView, boolean initializeParams) { + protected INDArray createBias(LayerConfiguration conf, INDArray biasView, boolean initializeParams) { //the bias is a 1D tensor -- one bias per output feature map org.deeplearning4j.nn.conf.layers.ConvolutionLayer layerConf = - (org.deeplearning4j.nn.conf.layers.ConvolutionLayer) conf.getLayer(); + (org.deeplearning4j.nn.conf.layers.ConvolutionLayer) conf; if (initializeParams) biasView.assign(layerConf.getBiasInit()); return biasView; } - protected INDArray createWeightMatrix(NeuralNetConfiguration conf, INDArray weightView, boolean initializeParams) { + protected INDArray createWeightMatrix(LayerConfiguration conf, INDArray weightView, boolean initializeParams) { /* Create a 4d weight matrix of: (number of kernels, num input channels, kernel height, kernel width) @@ -173,7 +167,7 @@ public class ConvolutionParamInitializer implements ParamInitializer { (batch size, num input feature maps, image height, image width) */ org.deeplearning4j.nn.conf.layers.ConvolutionLayer layerConf = - (org.deeplearning4j.nn.conf.layers.ConvolutionLayer) conf.getLayer(); + (org.deeplearning4j.nn.conf.layers.ConvolutionLayer) conf; if (initializeParams) { int[] kernel = layerConf.getKernelSize(); int[] stride = layerConf.getStride(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/Deconvolution3DParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/Deconvolution3DParamInitializer.java index 8169fec5f..7f8b8e9e6 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/Deconvolution3DParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/Deconvolution3DParamInitializer.java @@ -24,7 +24,7 @@ package org.deeplearning4j.nn.params; import lombok.val; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.Deconvolution3D; -import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.weights.WeightInitUtil; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.indexing.NDArrayIndex; @@ -45,12 +45,7 @@ public class Deconvolution3DParamInitializer extends ConvolutionParamInitializer public final static String BIAS_KEY = DefaultParamInitializer.BIAS_KEY; @Override - public long numParams(NeuralNetConfiguration conf) { - return numParams(conf.getLayer()); - } - - @Override - public long numParams(Layer l) { + public long numParams(LayerConfiguration l) { Deconvolution3D layerConf = (Deconvolution3D) l; int[] kernel = layerConf.getKernelSize(); @@ -61,13 +56,13 @@ public class Deconvolution3DParamInitializer extends ConvolutionParamInitializer @Override - public Map init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { - Deconvolution3D layer = (Deconvolution3D) conf.getLayer(); + public Map init(LayerConfiguration conf, INDArray paramsView, boolean initializeParams) { + Deconvolution3D layer = (Deconvolution3D) conf; if (layer.getKernelSize().length != 3) throw new IllegalArgumentException("Filter size must be == 3"); Map params = Collections.synchronizedMap(new LinkedHashMap()); - Deconvolution3D layerConf = (Deconvolution3D) conf.getLayer(); + Deconvolution3D layerConf = (Deconvolution3D) conf; val nOut = layerConf.getNOut(); if (layer.hasBias()) { @@ -87,9 +82,9 @@ public class Deconvolution3DParamInitializer extends ConvolutionParamInitializer } @Override - public Map getGradientsFromFlattened(NeuralNetConfiguration conf, INDArray gradientView) { + public Map getGradientsFromFlattened(LayerConfiguration conf, INDArray gradientView) { - Deconvolution3D layerConf = (Deconvolution3D) conf.getLayer(); + Deconvolution3D layerConf = (Deconvolution3D) conf; int[] kernel = layerConf.getKernelSize(); val nIn = layerConf.getNIn(); @@ -111,7 +106,7 @@ public class Deconvolution3DParamInitializer extends ConvolutionParamInitializer } - protected INDArray createWeightMatrix(NeuralNetConfiguration conf, INDArray weightView, boolean initializeParams) { + protected INDArray createWeightMatrix(LayerConfiguration conf, INDArray weightView, boolean initializeParams) { /* Create a 5d weight matrix of: (number of kernels, num input channels, kernel depth, kernel height, kernel width) @@ -119,7 +114,7 @@ public class Deconvolution3DParamInitializer extends ConvolutionParamInitializer Inputs to the convolution layer are: (batch size, num input feature maps, image depth, image height, image width) */ - Deconvolution3D layerConf = (Deconvolution3D) conf.getLayer(); + Deconvolution3D layerConf = (Deconvolution3D) conf; if (initializeParams) { int[] kernel = layerConf.getKernelSize(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DeconvolutionParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DeconvolutionParamInitializer.java index a39a1f454..1c7ac91d9 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DeconvolutionParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DeconvolutionParamInitializer.java @@ -22,6 +22,7 @@ package org.deeplearning4j.nn.params; import lombok.val; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.weights.WeightInitUtil; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.indexing.NDArrayIndex; @@ -38,7 +39,7 @@ public class DeconvolutionParamInitializer extends ConvolutionParamInitializer { } @Override - protected INDArray createWeightMatrix(NeuralNetConfiguration conf, INDArray weightView, boolean initializeParams) { + protected INDArray createWeightMatrix(LayerConfiguration conf, INDArray weightView, boolean initializeParams) { /* Create a 4d weight matrix of: (number of kernels, num input channels, kernel height, kernel width) @@ -47,7 +48,7 @@ public class DeconvolutionParamInitializer extends ConvolutionParamInitializer { (batch size, num input feature maps, image height, image width) */ org.deeplearning4j.nn.conf.layers.Deconvolution2D layerConf = - (org.deeplearning4j.nn.conf.layers.Deconvolution2D) conf.getLayer(); + (org.deeplearning4j.nn.conf.layers.Deconvolution2D) conf; if (initializeParams) { int[] kernel = layerConf.getKernelSize(); int[] stride = layerConf.getStride(); @@ -76,10 +77,10 @@ public class DeconvolutionParamInitializer extends ConvolutionParamInitializer { } @Override - public Map getGradientsFromFlattened(NeuralNetConfiguration conf, INDArray gradientView) { + public Map getGradientsFromFlattened(LayerConfiguration conf, INDArray gradientView) { org.deeplearning4j.nn.conf.layers.Deconvolution2D layerConf = - (org.deeplearning4j.nn.conf.layers.Deconvolution2D) conf.getLayer(); + (org.deeplearning4j.nn.conf.layers.Deconvolution2D) conf; int[] kernel = layerConf.getKernelSize(); val nIn = layerConf.getNIn(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DefaultParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DefaultParamInitializer.java index b41f05b4e..c20562223 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DefaultParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DefaultParamInitializer.java @@ -20,18 +20,20 @@ package org.deeplearning4j.nn.params; +import lombok.NonNull; +import lombok.extern.slf4j.Slf4j; import lombok.val; -import org.deeplearning4j.nn.api.ParamInitializer; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.api.AbstractParamInitializer; import org.deeplearning4j.nn.conf.layers.*; import org.deeplearning4j.nn.weights.IWeightInit; import org.deeplearning4j.nn.weights.WeightInitUtil; +import org.deeplearning4j.nn.weights.WeightInitXavier; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.indexing.NDArrayIndex; import java.util.*; - -public class DefaultParamInitializer implements ParamInitializer { +@Slf4j +public class DefaultParamInitializer extends AbstractParamInitializer { private static final DefaultParamInitializer INSTANCE = new DefaultParamInitializer(); @@ -44,12 +46,7 @@ public class DefaultParamInitializer implements ParamInitializer { public final static String GAIN_KEY = "g"; @Override - public long numParams(NeuralNetConfiguration conf) { - return numParams(conf.getLayer()); - } - - @Override - public long numParams(Layer l) { + public long numParams(LayerConfiguration l) { FeedForwardLayer layerConf = (FeedForwardLayer) l; val nIn = layerConf.getNIn(); val nOut = layerConf.getNOut(); @@ -57,7 +54,7 @@ public class DefaultParamInitializer implements ParamInitializer { } @Override - public List paramKeys(Layer layer) { + public List paramKeys(LayerConfiguration layer) { final ArrayList keys = new ArrayList<>(3); keys.addAll(weightKeys(layer)); keys.addAll(biasKeys(layer)); @@ -65,7 +62,7 @@ public class DefaultParamInitializer implements ParamInitializer { } @Override - public List weightKeys(Layer layer) { + public List weightKeys(LayerConfiguration layer) { if(hasLayerNorm(layer)){ return Arrays.asList(WEIGHT_KEY, GAIN_KEY); } @@ -73,7 +70,7 @@ public class DefaultParamInitializer implements ParamInitializer { } @Override - public List biasKeys(Layer layer) { + public List biasKeys(LayerConfiguration layer) { if(hasBias(layer)){ return Collections.singletonList(BIAS_KEY); } else { @@ -83,19 +80,19 @@ public class DefaultParamInitializer implements ParamInitializer { @Override - public boolean isWeightParam(Layer layer, String key) { + public boolean isWeightParam(LayerConfiguration layer, String key) { return WEIGHT_KEY.equals(key) || (hasLayerNorm(layer) && GAIN_KEY.equals(key)); } @Override - public boolean isBiasParam(Layer layer, String key) { + public boolean isBiasParam(LayerConfiguration layer, String key) { return BIAS_KEY.equals(key); } @Override - public Map init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { - if (!(conf.getLayer() instanceof org.deeplearning4j.nn.conf.layers.FeedForwardLayer)) - throw new IllegalArgumentException("unsupported layer type: " + conf.getLayer().getClass().getName()); + public Map init(@NonNull LayerConfiguration conf, INDArray paramsView, boolean initializeParams) { + if (!(conf instanceof org.deeplearning4j.nn.conf.layers.FeedForwardLayer)) + throw new IllegalArgumentException("unsupported layer type: " + conf.getClass().getName()); Map params = Collections.synchronizedMap(new LinkedHashMap()); @@ -105,22 +102,22 @@ public class DefaultParamInitializer implements ParamInitializer { "Expected params view of length " + length + ", got length " + paramsView.length()); org.deeplearning4j.nn.conf.layers.FeedForwardLayer layerConf = - (org.deeplearning4j.nn.conf.layers.FeedForwardLayer) conf.getLayer(); + (org.deeplearning4j.nn.conf.layers.FeedForwardLayer) conf; val nIn = layerConf.getNIn(); val nOut = layerConf.getNOut(); val nWeightParams = nIn * nOut; INDArray weightView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, nWeightParams)); - params.put(WEIGHT_KEY, createWeightMatrix(conf, weightView, initializeParams)); - conf.addVariable(WEIGHT_KEY); + params.put(WEIGHT_KEY, createWeightMatrix(layerConf, weightView, initializeParams)); + layerConf.getNetConfiguration().addNetWideVariable(WEIGHT_KEY); long offset = nWeightParams; if(hasBias(layerConf)){ INDArray biasView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(offset, offset + nOut)); - params.put(BIAS_KEY, createBias(conf, biasView, initializeParams)); - conf.addVariable(BIAS_KEY); + params.put(BIAS_KEY, createBias(layerConf, biasView, initializeParams)); + layerConf.getNetConfiguration().addNetWideVariable(BIAS_KEY); offset += nOut; } @@ -128,16 +125,16 @@ public class DefaultParamInitializer implements ParamInitializer { INDArray gainView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(offset, offset + nOut)); params.put(GAIN_KEY, createGain(conf, gainView, initializeParams)); - conf.addVariable(GAIN_KEY); + conf.getNetConfiguration().addNetWideVariable(GAIN_KEY); } return params; } @Override - public Map getGradientsFromFlattened(NeuralNetConfiguration conf, INDArray gradientView) { + public Map getGradientsFromFlattened(LayerConfiguration conf, INDArray gradientView) { org.deeplearning4j.nn.conf.layers.FeedForwardLayer layerConf = - (org.deeplearning4j.nn.conf.layers.FeedForwardLayer) conf.getLayer(); + (org.deeplearning4j.nn.conf.layers.FeedForwardLayer) conf; val nIn = layerConf.getNIn(); val nOut = layerConf.getNOut(); val nWeightParams = nIn * nOut; @@ -166,9 +163,9 @@ public class DefaultParamInitializer implements ParamInitializer { } - protected INDArray createBias(NeuralNetConfiguration conf, INDArray biasParamView, boolean initializeParameters) { + protected INDArray createBias(LayerConfiguration conf, INDArray biasParamView, boolean initializeParameters) { org.deeplearning4j.nn.conf.layers.FeedForwardLayer layerConf = - (org.deeplearning4j.nn.conf.layers.FeedForwardLayer) conf.getLayer(); + (org.deeplearning4j.nn.conf.layers.FeedForwardLayer) conf; return createBias(layerConf.getNOut(), layerConf.getBiasInit(), biasParamView, initializeParameters); } @@ -179,9 +176,9 @@ public class DefaultParamInitializer implements ParamInitializer { return biasParamView; } - protected INDArray createGain(NeuralNetConfiguration conf, INDArray gainParamView, boolean initializeParameters) { + protected INDArray createGain(LayerConfiguration conf, INDArray gainParamView, boolean initializeParameters) { org.deeplearning4j.nn.conf.layers.FeedForwardLayer layerConf = - (org.deeplearning4j.nn.conf.layers.FeedForwardLayer) conf.getLayer(); + (org.deeplearning4j.nn.conf.layers.FeedForwardLayer) conf; return createGain(layerConf.getNOut(), layerConf.getGainInit(), gainParamView, initializeParameters); } @@ -193,12 +190,18 @@ public class DefaultParamInitializer implements ParamInitializer { } - protected INDArray createWeightMatrix(NeuralNetConfiguration conf, INDArray weightParamView, + protected INDArray createWeightMatrix(LayerConfiguration conf, INDArray weightParamView, boolean initializeParameters) { org.deeplearning4j.nn.conf.layers.FeedForwardLayer layerConf = - (org.deeplearning4j.nn.conf.layers.FeedForwardLayer) conf.getLayer(); + (org.deeplearning4j.nn.conf.layers.FeedForwardLayer) conf; if (initializeParameters) { + if( layerConf.getWeightInitFn() == null) { + // set a default and set warning + layerConf.setWeightInitFn(new WeightInitXavier()); + log.warn("Weight Initializer function was not set on layer {} of class {}, it will default to {}", conf.getLayerName(), + conf.getClass().getSimpleName(), WeightInitXavier.class.getSimpleName()); + } return createWeightMatrix(layerConf.getNIn(), layerConf.getNOut(), layerConf.getWeightInitFn(), weightParamView, true); } else { @@ -206,7 +209,8 @@ public class DefaultParamInitializer implements ParamInitializer { } } - protected INDArray createWeightMatrix(long nIn, long nOut, IWeightInit weightInit, + protected INDArray createWeightMatrix(long nIn, long nOut, + @NonNull IWeightInit weightInit, INDArray weightParamView, boolean initializeParameters) { val shape = new long[] {nIn, nOut}; @@ -220,7 +224,7 @@ public class DefaultParamInitializer implements ParamInitializer { } } - protected boolean hasBias(Layer layer){ + protected boolean hasBias(LayerConfiguration layer){ if(layer instanceof BaseOutputLayer ) { return ((BaseOutputLayer) layer).hasBias(); } else if(layer instanceof DenseLayer){ @@ -233,7 +237,7 @@ public class DefaultParamInitializer implements ParamInitializer { return true; } - protected boolean hasLayerNorm(Layer layer){ + protected boolean hasLayerNorm(LayerConfiguration layer){ if(layer instanceof DenseLayer){ return ((DenseLayer) layer).hasLayerNorm(); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DepthwiseConvolutionParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DepthwiseConvolutionParamInitializer.java index b9f682818..72f2ac6ba 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DepthwiseConvolutionParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DepthwiseConvolutionParamInitializer.java @@ -22,17 +22,18 @@ package org.deeplearning4j.nn.params; import lombok.val; +import org.deeplearning4j.nn.api.AbstractParamInitializer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.DepthwiseConvolution2D; -import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.weights.WeightInitUtil; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.indexing.NDArrayIndex; import java.util.*; -public class DepthwiseConvolutionParamInitializer implements ParamInitializer { +public class DepthwiseConvolutionParamInitializer extends AbstractParamInitializer { private static final DepthwiseConvolutionParamInitializer INSTANCE = new DepthwiseConvolutionParamInitializer(); @@ -44,12 +45,7 @@ public class DepthwiseConvolutionParamInitializer implements ParamInitializer { public final static String BIAS_KEY = DefaultParamInitializer.BIAS_KEY; @Override - public long numParams(NeuralNetConfiguration conf) { - return numParams(conf.getLayer()); - } - - @Override - public long numParams(Layer l) { + public long numParams(LayerConfiguration l) { DepthwiseConvolution2D layerConf = (DepthwiseConvolution2D) l; val depthWiseParams = numDepthWiseParams(layerConf); @@ -79,7 +75,7 @@ public class DepthwiseConvolutionParamInitializer implements ParamInitializer { } @Override - public List paramKeys(Layer layer) { + public List paramKeys(LayerConfiguration layer) { DepthwiseConvolution2D layerConf = (DepthwiseConvolution2D) layer; if(layerConf.hasBias()){ @@ -90,12 +86,12 @@ public class DepthwiseConvolutionParamInitializer implements ParamInitializer { } @Override - public List weightKeys(Layer layer) { + public List weightKeys(LayerConfiguration layer) { return Collections.singletonList(WEIGHT_KEY); } @Override - public List biasKeys(Layer layer) { + public List biasKeys(LayerConfiguration layer) { DepthwiseConvolution2D layerConf = (DepthwiseConvolution2D) layer; if(layerConf.hasBias()){ @@ -106,23 +102,23 @@ public class DepthwiseConvolutionParamInitializer implements ParamInitializer { } @Override - public boolean isWeightParam(Layer layer, String key) { + public boolean isWeightParam(LayerConfiguration layer, String key) { return WEIGHT_KEY.equals(key); } @Override - public boolean isBiasParam(Layer layer, String key) { + public boolean isBiasParam(LayerConfiguration layer, String key) { return BIAS_KEY.equals(key); } @Override - public Map init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { - DepthwiseConvolution2D layer = (DepthwiseConvolution2D) conf.getLayer(); + public Map init(LayerConfiguration conf, INDArray paramsView, boolean initializeParams) { + DepthwiseConvolution2D layer = (DepthwiseConvolution2D) conf; if (layer.getKernelSize().length != 2) throw new IllegalArgumentException("Filter size must be == 2"); Map params = Collections.synchronizedMap(new LinkedHashMap()); - DepthwiseConvolution2D layerConf = (DepthwiseConvolution2D) conf.getLayer(); + DepthwiseConvolution2D layerConf = (DepthwiseConvolution2D) conf; val depthWiseParams = numDepthWiseParams(layerConf); val biasParams = numBiasParams(layerConf); @@ -143,9 +139,9 @@ public class DepthwiseConvolutionParamInitializer implements ParamInitializer { } @Override - public Map getGradientsFromFlattened(NeuralNetConfiguration conf, INDArray gradientView) { + public Map getGradientsFromFlattened(LayerConfiguration conf, INDArray gradientView) { - DepthwiseConvolution2D layerConf = (DepthwiseConvolution2D) conf.getLayer(); + DepthwiseConvolution2D layerConf = (DepthwiseConvolution2D) conf; int[] kernel = layerConf.getKernelSize(); val nIn = layerConf.getNIn(); @@ -169,21 +165,21 @@ public class DepthwiseConvolutionParamInitializer implements ParamInitializer { return out; } - protected INDArray createBias(NeuralNetConfiguration conf, INDArray biasView, boolean initializeParams) { - DepthwiseConvolution2D layerConf = (DepthwiseConvolution2D) conf.getLayer(); + protected INDArray createBias(LayerConfiguration conf, INDArray biasView, boolean initializeParams) { + DepthwiseConvolution2D layerConf = (DepthwiseConvolution2D) conf; if (initializeParams) biasView.assign(layerConf.getBiasInit()); return biasView; } - protected INDArray createDepthWiseWeightMatrix(NeuralNetConfiguration conf, INDArray weightView, boolean initializeParams) { + protected INDArray createDepthWiseWeightMatrix(LayerConfiguration conf, INDArray weightView, boolean initializeParams) { /* Create a 4d weight matrix of: (channels multiplier, num input channels, kernel height, kernel width) Inputs to the convolution layer are: (batch size, num input feature maps, image height, image width) */ DepthwiseConvolution2D layerConf = - (DepthwiseConvolution2D) conf.getLayer(); + (DepthwiseConvolution2D) conf; int depthMultiplier = layerConf.getDepthMultiplier(); if (initializeParams) { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/ElementWiseParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/ElementWiseParamInitializer.java index 7245d6dab..665a47d7f 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/ElementWiseParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/ElementWiseParamInitializer.java @@ -23,7 +23,7 @@ package org.deeplearning4j.nn.params; import lombok.val; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.FeedForwardLayer; -import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.weights.IWeightInit; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.indexing.NDArrayIndex; @@ -41,7 +41,7 @@ public class ElementWiseParamInitializer extends DefaultParamInitializer{ } @Override - public long numParams(Layer layer) { + public long numParams(LayerConfiguration layer) { FeedForwardLayer layerConf = (FeedForwardLayer) layer; val nIn = layerConf.getNIn(); return nIn*2; //weights + bias @@ -57,9 +57,9 @@ public class ElementWiseParamInitializer extends DefaultParamInitializer{ * @return Map of parameters keyed by type (view of the 'paramsView' array) */ @Override - public Map init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { - if (!(conf.getLayer() instanceof org.deeplearning4j.nn.conf.layers.FeedForwardLayer)) - throw new IllegalArgumentException("unsupported layer type: " + conf.getLayer().getClass().getName()); + public Map init(LayerConfiguration conf, INDArray paramsView, boolean initializeParams) { + if (!(conf instanceof org.deeplearning4j.nn.conf.layers.FeedForwardLayer)) + throw new IllegalArgumentException("unsupported layer type: " + conf.getClass().getName()); Map params = Collections.synchronizedMap(new LinkedHashMap()); @@ -69,7 +69,7 @@ public class ElementWiseParamInitializer extends DefaultParamInitializer{ "Expected params view of length " + length + ", got length " + paramsView.length()); org.deeplearning4j.nn.conf.layers.FeedForwardLayer layerConf = - (org.deeplearning4j.nn.conf.layers.FeedForwardLayer) conf.getLayer(); + (org.deeplearning4j.nn.conf.layers.FeedForwardLayer) conf; val nIn = layerConf.getNIn(); val nWeightParams = nIn ; @@ -96,9 +96,9 @@ public class ElementWiseParamInitializer extends DefaultParamInitializer{ * @return A map containing an array by parameter type, that is a view of the full network gradients array */ @Override - public Map getGradientsFromFlattened(NeuralNetConfiguration conf, INDArray gradientView) { + public Map getGradientsFromFlattened(LayerConfiguration conf, INDArray gradientView) { org.deeplearning4j.nn.conf.layers.FeedForwardLayer layerConf = - (org.deeplearning4j.nn.conf.layers.FeedForwardLayer) conf.getLayer(); + (org.deeplearning4j.nn.conf.layers.FeedForwardLayer) conf; val nIn = layerConf.getNIn(); val nOut = layerConf.getNOut(); val nWeightParams = nIn ; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/EmptyParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/EmptyParamInitializer.java index 7ec9ea885..28d458e78 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/EmptyParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/EmptyParamInitializer.java @@ -20,9 +20,10 @@ package org.deeplearning4j.nn.params; +import org.deeplearning4j.nn.api.AbstractParamInitializer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.nd4j.linalg.api.ndarray.INDArray; import java.util.Collections; @@ -32,7 +33,7 @@ import java.util.Map; /** * @author Adam Gibson */ -public class EmptyParamInitializer implements ParamInitializer { +public class EmptyParamInitializer extends AbstractParamInitializer { private static final EmptyParamInitializer INSTANCE = new EmptyParamInitializer(); @@ -41,47 +42,42 @@ public class EmptyParamInitializer implements ParamInitializer { } @Override - public long numParams(NeuralNetConfiguration conf) { + public long numParams(LayerConfiguration layer) { return 0; } @Override - public long numParams(Layer layer) { - return 0; - } - - @Override - public List paramKeys(Layer layer) { + public List paramKeys(LayerConfiguration layer) { return Collections.emptyList(); } @Override - public List weightKeys(Layer layer) { + public List weightKeys(LayerConfiguration layer) { return Collections.emptyList(); } @Override - public List biasKeys(Layer layer) { + public List biasKeys(LayerConfiguration layer) { return Collections.emptyList(); } @Override - public boolean isWeightParam(Layer layer, String key) { + public boolean isWeightParam(LayerConfiguration layer, String key) { return false; } @Override - public boolean isBiasParam(Layer layer, String key) { + public boolean isBiasParam(LayerConfiguration layer, String key) { return false; } @Override - public Map init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { + public Map init(LayerConfiguration conf, INDArray paramsView, boolean initializeParams) { return Collections.EMPTY_MAP; } @Override - public Map getGradientsFromFlattened(NeuralNetConfiguration conf, INDArray gradientView) { + public Map getGradientsFromFlattened(LayerConfiguration conf, INDArray gradientView) { return Collections.emptyMap(); } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/FrozenLayerParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/FrozenLayerParamInitializer.java index 71bff7702..580d07402 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/FrozenLayerParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/FrozenLayerParamInitializer.java @@ -20,81 +20,75 @@ package org.deeplearning4j.nn.params; -import org.deeplearning4j.nn.api.ParamInitializer; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.layers.Layer; -import org.deeplearning4j.nn.conf.layers.misc.FrozenLayer; -import org.nd4j.linalg.api.ndarray.INDArray; - import java.util.Collections; import java.util.List; import java.util.Map; +import org.deeplearning4j.nn.api.AbstractParamInitializer; +import org.deeplearning4j.nn.api.ParamInitializer; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; +import org.deeplearning4j.nn.conf.layers.misc.FrozenLayer; +import org.nd4j.linalg.api.ndarray.INDArray; -public class FrozenLayerParamInitializer implements ParamInitializer { +public class FrozenLayerParamInitializer extends AbstractParamInitializer { - private static final FrozenLayerParamInitializer INSTANCE = new FrozenLayerParamInitializer(); + private static final FrozenLayerParamInitializer INSTANCE = new FrozenLayerParamInitializer(); - public static FrozenLayerParamInitializer getInstance() { - return INSTANCE; - } + public static FrozenLayerParamInitializer getInstance() { + return INSTANCE; + } - @Override - public long numParams(NeuralNetConfiguration conf) { - return numParams(conf.getLayer()); - } + @Override + public long numParams(LayerConfiguration layer) { + FrozenLayer fl = (FrozenLayer) layer; + ParamInitializer initializer = fl.getInnerConfiguration().initializer(); + return initializer.numParams(fl.getInnerConfiguration()); + } - @Override - public long numParams(Layer layer) { - FrozenLayer fl = (FrozenLayer) layer; - ParamInitializer initializer = fl.getLayer().initializer(); - return initializer.numParams(fl.getLayer()); - } + @Override + public List paramKeys(LayerConfiguration layer) { + return Collections.emptyList(); + } - @Override - public List paramKeys(Layer layer) { - return Collections.emptyList(); - } + @Override + public List weightKeys(LayerConfiguration layer) { + return Collections.emptyList(); + } - @Override - public List weightKeys(Layer layer) { - return Collections.emptyList(); - } + @Override + public List biasKeys(LayerConfiguration layer) { + return Collections.emptyList(); + } - @Override - public List biasKeys(Layer layer) { - return Collections.emptyList(); - } + @Override + public boolean isWeightParam(LayerConfiguration layer, String key) { + return false; + } - @Override - public boolean isWeightParam(Layer layer, String key) { - return false; - } + @Override + public boolean isBiasParam(LayerConfiguration layer, String key) { + return false; + } - @Override - public boolean isBiasParam(Layer layer, String key) { - return false; - } + @Override + public Map init(LayerConfiguration conf, INDArray paramsView, + boolean initializeParams) { + FrozenLayer fl_conf = (FrozenLayer) conf; + LayerConfiguration innerLayer = fl_conf.getInnerConfiguration(); + ParamInitializer initializer = innerLayer.initializer(); + fl_conf.setInnerConfiguration(innerLayer); + Map m = initializer.init(conf, paramsView, initializeParams); + return m; + } - @Override - public Map init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { - FrozenLayer fl = (FrozenLayer) conf.getLayer(); - Layer innerLayer = fl.getLayer(); - ParamInitializer initializer = innerLayer.initializer(); - conf.setLayer(innerLayer); - Map m = initializer.init(conf, paramsView, initializeParams); - conf.setLayer(fl); - - return m; - } - - @Override - public Map getGradientsFromFlattened(NeuralNetConfiguration conf, INDArray gradientView) { - FrozenLayer fl = (FrozenLayer) conf.getLayer(); - Layer innerLayer = fl.getLayer(); - ParamInitializer initializer = innerLayer.initializer(); - conf.setLayer(innerLayer); - Map m = initializer.getGradientsFromFlattened(conf, gradientView); - conf.setLayer(fl); - return m; - } + @Override + public Map getGradientsFromFlattened(LayerConfiguration conf, + INDArray gradientView) { + FrozenLayer fl = (FrozenLayer) conf; + LayerConfiguration innerLayer = fl.getInnerConfiguration(); + ParamInitializer initializer = innerLayer.initializer(); + fl.setInnerConfiguration(innerLayer); + Map m = initializer.getGradientsFromFlattened(conf, gradientView); + return m; + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/FrozenLayerWithBackpropParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/FrozenLayerWithBackpropParamInitializer.java index 5aa01b3e4..1328e28d9 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/FrozenLayerWithBackpropParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/FrozenLayerWithBackpropParamInitializer.java @@ -20,10 +20,10 @@ package org.deeplearning4j.nn.params; +import org.deeplearning4j.nn.api.AbstractParamInitializer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.layers.Layer; -import org.deeplearning4j.nn.conf.layers.misc.FrozenLayer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.misc.FrozenLayerWithBackprop; import org.nd4j.linalg.api.ndarray.INDArray; @@ -31,7 +31,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; -public class FrozenLayerWithBackpropParamInitializer implements ParamInitializer { +public class FrozenLayerWithBackpropParamInitializer extends AbstractParamInitializer { private static final FrozenLayerWithBackpropParamInitializer INSTANCE = new FrozenLayerWithBackpropParamInitializer(); @@ -40,62 +40,54 @@ public class FrozenLayerWithBackpropParamInitializer implements ParamInitializer } @Override - public long numParams(NeuralNetConfiguration conf) { - return numParams(conf.getLayer()); - } - - @Override - public long numParams(Layer layer) { + public long numParams(LayerConfiguration layer) { FrozenLayerWithBackprop fl = (FrozenLayerWithBackprop) layer; ParamInitializer initializer = fl.getUnderlying().initializer(); return initializer.numParams(fl.getUnderlying()); } @Override - public List paramKeys(Layer layer) { + public List paramKeys(LayerConfiguration layer) { return Collections.emptyList(); } @Override - public List weightKeys(Layer layer) { + public List weightKeys(LayerConfiguration layer) { return Collections.emptyList(); } @Override - public List biasKeys(Layer layer) { + public List biasKeys(LayerConfiguration layer) { return Collections.emptyList(); } @Override - public boolean isWeightParam(Layer layer, String key) { + public boolean isWeightParam(LayerConfiguration layer, String key) { return false; } @Override - public boolean isBiasParam(Layer layer, String key) { + public boolean isBiasParam(LayerConfiguration layer, String key) { return false; } @Override - public Map init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { - FrozenLayerWithBackprop fl = (FrozenLayerWithBackprop) conf.getLayer(); - Layer innerLayer = fl.getUnderlying(); + public Map init(LayerConfiguration conf, INDArray paramsView, boolean initializeParams) { + FrozenLayerWithBackprop fl = (FrozenLayerWithBackprop) conf; + LayerConfiguration innerLayer = fl.getUnderlying(); ParamInitializer initializer = innerLayer.initializer(); - conf.setLayer(innerLayer); + fl.setUnderlying(innerLayer); Map m = initializer.init(conf, paramsView, initializeParams); - conf.setLayer(fl); - return m; } @Override - public Map getGradientsFromFlattened(NeuralNetConfiguration conf, INDArray gradientView) { - FrozenLayerWithBackprop fl = (FrozenLayerWithBackprop) conf.getLayer(); - Layer innerLayer = fl.getUnderlying(); + public Map getGradientsFromFlattened(LayerConfiguration conf, INDArray gradientView) { + FrozenLayerWithBackprop fl = (FrozenLayerWithBackprop) conf; + LayerConfiguration innerLayer = fl.getUnderlying(); ParamInitializer initializer = innerLayer.initializer(); - conf.setLayer(innerLayer); + fl.setUnderlying(innerLayer); Map m = initializer.getGradientsFromFlattened(conf, gradientView); - conf.setLayer(fl); return m; } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/GravesBidirectionalLSTMParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/GravesBidirectionalLSTMParamInitializer.java index de437ee6d..5239a6c2c 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/GravesBidirectionalLSTMParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/GravesBidirectionalLSTMParamInitializer.java @@ -21,9 +21,10 @@ package org.deeplearning4j.nn.params; import lombok.val; +import org.deeplearning4j.nn.api.AbstractParamInitializer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.weights.IWeightInit; import org.deeplearning4j.nn.weights.WeightInitUtil; import org.nd4j.linalg.api.ndarray.INDArray; @@ -33,7 +34,7 @@ import org.nd4j.linalg.indexing.NDArrayIndex; import java.util.*; -public class GravesBidirectionalLSTMParamInitializer implements ParamInitializer { +public class GravesBidirectionalLSTMParamInitializer extends AbstractParamInitializer { private static final GravesBidirectionalLSTMParamInitializer INSTANCE = new GravesBidirectionalLSTMParamInitializer(); @@ -61,12 +62,7 @@ public class GravesBidirectionalLSTMParamInitializer implements ParamInitializer BIAS_KEY_BACKWARDS)); @Override - public long numParams(NeuralNetConfiguration conf) { - return numParams(conf.getLayer()); - } - - @Override - public long numParams(Layer l) { + public long numParams(LayerConfiguration l) { org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM layerConf = (org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM) l; @@ -81,37 +77,37 @@ public class GravesBidirectionalLSTMParamInitializer implements ParamInitializer } @Override - public List paramKeys(Layer layer) { + public List paramKeys(LayerConfiguration layer) { return ALL_PARAM_KEYS; } @Override - public List weightKeys(Layer layer) { + public List weightKeys(LayerConfiguration layer) { return WEIGHT_KEYS; } @Override - public List biasKeys(Layer layer) { + public List biasKeys(LayerConfiguration layer) { return BIAS_KEYS; } @Override - public boolean isWeightParam(Layer layer, String key) { + public boolean isWeightParam(LayerConfiguration layer, String key) { return RECURRENT_WEIGHT_KEY_FORWARDS.equals(key) || INPUT_WEIGHT_KEY_FORWARDS.equals(key) || RECURRENT_WEIGHT_KEY_BACKWARDS.equals(key) || INPUT_WEIGHT_KEY_BACKWARDS.equals(key); } @Override - public boolean isBiasParam(Layer layer, String key) { + public boolean isBiasParam(LayerConfiguration layer, String key) { return BIAS_KEY_FORWARDS.equals(key) || BIAS_KEY_BACKWARDS.equals(key); } @Override - public Map init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { + public Map init(LayerConfiguration conf, INDArray paramsView, boolean initializeParams) { Map params = Collections.synchronizedMap(new LinkedHashMap()); org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM layerConf = - (org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM) conf.getLayer(); + (org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM) conf; double forgetGateInit = layerConf.getForgetGateBiasInit(); val nL = layerConf.getNOut(); //i.e., n neurons in this layer @@ -187,9 +183,9 @@ public class GravesBidirectionalLSTMParamInitializer implements ParamInitializer @Override - public Map getGradientsFromFlattened(NeuralNetConfiguration conf, INDArray gradientView) { + public Map getGradientsFromFlattened(LayerConfiguration conf, INDArray gradientView) { org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM layerConf = - (org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM) conf.getLayer(); + (org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM) conf; val nL = layerConf.getNOut(); //i.e., n neurons in this layer val nLast = layerConf.getNIn(); //i.e., n neurons in previous layer diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/GravesLSTMParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/GravesLSTMParamInitializer.java index 37e4d1cdf..5c59e5f7e 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/GravesLSTMParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/GravesLSTMParamInitializer.java @@ -21,9 +21,10 @@ package org.deeplearning4j.nn.params; import lombok.val; +import org.deeplearning4j.nn.api.AbstractParamInitializer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.weights.IWeightInit; import org.deeplearning4j.nn.weights.WeightInitUtil; import org.nd4j.linalg.api.ndarray.INDArray; @@ -33,7 +34,7 @@ import org.nd4j.linalg.indexing.NDArrayIndex; import java.util.*; -public class GravesLSTMParamInitializer implements ParamInitializer { +public class GravesLSTMParamInitializer extends AbstractParamInitializer { private static final GravesLSTMParamInitializer INSTANCE = new GravesLSTMParamInitializer(); @@ -47,12 +48,7 @@ public class GravesLSTMParamInitializer implements ParamInitializer { public final static String INPUT_WEIGHT_KEY = LSTMParamInitializer.INPUT_WEIGHT_KEY; @Override - public long numParams(NeuralNetConfiguration conf) { - return numParams(conf.getLayer()); - } - - @Override - public long numParams(Layer l) { + public long numParams(LayerConfiguration l) { org.deeplearning4j.nn.conf.layers.GravesLSTM layerConf = (org.deeplearning4j.nn.conf.layers.GravesLSTM) l; val nL = layerConf.getNOut(); //i.e., n neurons in this layer @@ -66,35 +62,35 @@ public class GravesLSTMParamInitializer implements ParamInitializer { } @Override - public List paramKeys(Layer layer) { + public List paramKeys(LayerConfiguration layer) { return Arrays.asList(INPUT_WEIGHT_KEY, RECURRENT_WEIGHT_KEY, BIAS_KEY); } @Override - public List weightKeys(Layer layer) { + public List weightKeys(LayerConfiguration layer) { return Arrays.asList(INPUT_WEIGHT_KEY, RECURRENT_WEIGHT_KEY); } @Override - public List biasKeys(Layer layer) { + public List biasKeys(LayerConfiguration layer) { return Collections.singletonList(BIAS_KEY); } @Override - public boolean isWeightParam(Layer layer, String key) { + public boolean isWeightParam(LayerConfiguration layer, String key) { return RECURRENT_WEIGHT_KEY.equals(key) || INPUT_WEIGHT_KEY.equals(key); } @Override - public boolean isBiasParam(Layer layer, String key) { + public boolean isBiasParam(LayerConfiguration layer, String key) { return BIAS_KEY.equals(key); } @Override - public Map init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { + public Map init(LayerConfiguration conf, INDArray paramsView, boolean initializeParams) { Map params = Collections.synchronizedMap(new LinkedHashMap()); org.deeplearning4j.nn.conf.layers.GravesLSTM layerConf = - (org.deeplearning4j.nn.conf.layers.GravesLSTM) conf.getLayer(); + (org.deeplearning4j.nn.conf.layers.GravesLSTM) conf; double forgetGateInit = layerConf.getForgetGateBiasInit(); val nL = layerConf.getNOut(); //i.e., n neurons in this layer @@ -157,9 +153,9 @@ public class GravesLSTMParamInitializer implements ParamInitializer { } @Override - public Map getGradientsFromFlattened(NeuralNetConfiguration conf, INDArray gradientView) { + public Map getGradientsFromFlattened(LayerConfiguration conf, INDArray gradientView) { org.deeplearning4j.nn.conf.layers.GravesLSTM layerConf = - (org.deeplearning4j.nn.conf.layers.GravesLSTM) conf.getLayer(); + (org.deeplearning4j.nn.conf.layers.GravesLSTM) conf; val nL = layerConf.getNOut(); //i.e., n neurons in this layer val nLast = layerConf.getNIn(); //i.e., n neurons in previous layer diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/LSTMParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/LSTMParamInitializer.java index 2a7418957..04f12ea32 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/LSTMParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/LSTMParamInitializer.java @@ -20,11 +20,16 @@ package org.deeplearning4j.nn.params; +import java.util.Arrays; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; import lombok.val; +import org.deeplearning4j.nn.api.AbstractParamInitializer; import org.deeplearning4j.nn.api.ParamInitializer; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.LSTM; -import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.weights.IWeightInit; import org.deeplearning4j.nn.weights.WeightInitUtil; import org.nd4j.linalg.api.ndarray.INDArray; @@ -32,9 +37,7 @@ import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.INDArrayIndex; import org.nd4j.linalg.indexing.NDArrayIndex; -import java.util.*; - -public class LSTMParamInitializer implements ParamInitializer { +public class LSTMParamInitializer extends AbstractParamInitializer { private static final LSTMParamInitializer INSTANCE = new LSTMParamInitializer(); @@ -54,12 +57,7 @@ public class LSTMParamInitializer implements ParamInitializer { private static final List BIAS_KEYS = Collections.unmodifiableList(Collections.singletonList(BIAS_KEY)); @Override - public long numParams(NeuralNetConfiguration conf) { - return numParams(conf.getLayer()); - } - - @Override - public long numParams(Layer l) { + public long numParams(LayerConfiguration l) { LSTM layerConf = (LSTM) l; val nL = layerConf.getNOut(); //i.e., n neurons in this layer @@ -73,34 +71,34 @@ public class LSTMParamInitializer implements ParamInitializer { } @Override - public List paramKeys(Layer layer) { + public List paramKeys(LayerConfiguration layer) { return LAYER_PARAM_KEYS; } @Override - public List weightKeys(Layer layer) { + public List weightKeys(LayerConfiguration layer) { return WEIGHT_KEYS; } @Override - public List biasKeys(Layer layer) { + public List biasKeys(LayerConfiguration layer) { return BIAS_KEYS; } @Override - public boolean isWeightParam(Layer layer, String key) { + public boolean isWeightParam(LayerConfiguration layer, String key) { return RECURRENT_WEIGHT_KEY.equals(key) || INPUT_WEIGHT_KEY.equals(key); } @Override - public boolean isBiasParam(Layer layer, String key) { + public boolean isBiasParam(LayerConfiguration layer, String key) { return BIAS_KEY.equals(key); } @Override - public Map init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { + public Map init(LayerConfiguration conf, INDArray paramsView, boolean initializeParams) { Map params = Collections.synchronizedMap(new LinkedHashMap()); - org.deeplearning4j.nn.conf.layers.LSTM layerConf = (org.deeplearning4j.nn.conf.layers.LSTM) conf.getLayer(); + org.deeplearning4j.nn.conf.layers.LSTM layerConf = (org.deeplearning4j.nn.conf.layers.LSTM) conf; double forgetGateInit = layerConf.getForgetGateBiasInit(); val nL = layerConf.getNOut(); //i.e., n neurons in this layer @@ -162,8 +160,8 @@ public class LSTMParamInitializer implements ParamInitializer { } @Override - public Map getGradientsFromFlattened(NeuralNetConfiguration conf, INDArray gradientView) { - org.deeplearning4j.nn.conf.layers.LSTM layerConf = (org.deeplearning4j.nn.conf.layers.LSTM) conf.getLayer(); + public Map getGradientsFromFlattened(LayerConfiguration conf, INDArray gradientView) { + org.deeplearning4j.nn.conf.layers.LSTM layerConf = (org.deeplearning4j.nn.conf.layers.LSTM) conf; val nL = layerConf.getNOut(); //i.e., n neurons in this layer val nLast = layerConf.getNIn(); //i.e., n neurons in previous layer diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/PReLUParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/PReLUParamInitializer.java index d0a93e368..3de33be57 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/PReLUParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/PReLUParamInitializer.java @@ -21,10 +21,11 @@ package org.deeplearning4j.nn.params; import lombok.val; +import org.deeplearning4j.nn.api.AbstractParamInitializer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.BaseLayer; -import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.PReLULayer; import org.deeplearning4j.nn.weights.IWeightInit; import org.deeplearning4j.nn.weights.WeightInitUtil; @@ -36,7 +37,7 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -public class PReLUParamInitializer implements ParamInitializer { +public class PReLUParamInitializer extends AbstractParamInitializer { public final static String WEIGHT_KEY = "W"; private final long[] weightShape; @@ -58,14 +59,8 @@ public class PReLUParamInitializer implements ParamInitializer { return new PReLUParamInitializer(shape, sharedAxes); } - @Override - public long numParams(NeuralNetConfiguration conf) { - return numParams(conf.getLayer()); - } - - @Override - public long numParams(Layer l) { + public long numParams(LayerConfiguration l) { return numParams(weightShape); } @@ -78,34 +73,34 @@ public class PReLUParamInitializer implements ParamInitializer { } @Override - public List paramKeys(Layer layer) { + public List paramKeys(LayerConfiguration layer) { return weightKeys(layer); } @Override - public List weightKeys(Layer layer) { + public List weightKeys(LayerConfiguration layer) { return Collections.singletonList(WEIGHT_KEY); } @Override - public List biasKeys(Layer layer) { + public List biasKeys(LayerConfiguration layer) { return Collections.emptyList(); } @Override - public boolean isWeightParam(Layer layer, String key) { + public boolean isWeightParam(LayerConfiguration layer, String key) { return WEIGHT_KEY.equals(key); } @Override - public boolean isBiasParam(Layer layer, String key) { + public boolean isBiasParam(LayerConfiguration layer, String key) { return false; } @Override - public Map init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { - if (!(conf.getLayer() instanceof BaseLayer)) - throw new IllegalArgumentException("unsupported layer type: " + conf.getLayer().getClass().getName()); + public Map init(LayerConfiguration conf, INDArray paramsView, boolean initializeParams) { + if (!(conf instanceof BaseLayer)) + throw new IllegalArgumentException("unsupported layer type: " + conf.getClass().getName()); Map params = Collections.synchronizedMap(new LinkedHashMap()); @@ -123,7 +118,7 @@ public class PReLUParamInitializer implements ParamInitializer { } @Override - public Map getGradientsFromFlattened(NeuralNetConfiguration conf, INDArray gradientView) { + public Map getGradientsFromFlattened(LayerConfiguration conf, INDArray gradientView) { val length = numParams(conf); INDArray weightGradientView = gradientView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, length)) @@ -135,10 +130,10 @@ public class PReLUParamInitializer implements ParamInitializer { } - protected INDArray createWeightMatrix(NeuralNetConfiguration conf, INDArray weightParamView, + protected INDArray createWeightMatrix(LayerConfiguration conf, INDArray weightParamView, boolean initializeParameters) { - PReLULayer layerConf = (PReLULayer) conf.getLayer(); + PReLULayer layerConf = (PReLULayer) conf; if (initializeParameters) { return layerConf.getWeightInitFn().init(layerConf.getNIn(), layerConf.getNOut(), weightShape, IWeightInit.DEFAULT_WEIGHT_INIT_ORDER, weightParamView); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/PretrainParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/PretrainParamInitializer.java index c794a452c..4eb87427a 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/PretrainParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/PretrainParamInitializer.java @@ -22,6 +22,7 @@ package org.deeplearning4j.nn.params; import lombok.val; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.NDArrayIndex; @@ -45,18 +46,18 @@ public class PretrainParamInitializer extends DefaultParamInitializer { public final static String VISIBLE_BIAS_KEY = "v" + DefaultParamInitializer.BIAS_KEY; @Override - public long numParams(NeuralNetConfiguration conf) { + public long numParams(LayerConfiguration conf) { org.deeplearning4j.nn.conf.layers.BasePretrainNetwork layerConf = - (org.deeplearning4j.nn.conf.layers.BasePretrainNetwork) conf.getLayer(); + (org.deeplearning4j.nn.conf.layers.BasePretrainNetwork) conf; return super.numParams(conf) + layerConf.getNIn(); } @Override - public Map init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { + public Map init(LayerConfiguration conf, INDArray paramsView, boolean initializeParams) { Map params = super.init(conf, paramsView, initializeParams); org.deeplearning4j.nn.conf.layers.BasePretrainNetwork layerConf = - (org.deeplearning4j.nn.conf.layers.BasePretrainNetwork) conf.getLayer(); + (org.deeplearning4j.nn.conf.layers.BasePretrainNetwork) conf; val nIn = layerConf.getNIn(); val nOut = layerConf.getNOut(); val nWeightParams = nIn * nOut; @@ -69,10 +70,10 @@ public class PretrainParamInitializer extends DefaultParamInitializer { return params; } - protected INDArray createVisibleBias(NeuralNetConfiguration conf, INDArray visibleBiasView, + protected INDArray createVisibleBias(LayerConfiguration conf, INDArray visibleBiasView, boolean initializeParameters) { org.deeplearning4j.nn.conf.layers.BasePretrainNetwork layerConf = - (org.deeplearning4j.nn.conf.layers.BasePretrainNetwork) conf.getLayer(); + (org.deeplearning4j.nn.conf.layers.BasePretrainNetwork) conf; if (initializeParameters) { INDArray ret = Nd4j.valueArrayOf(new long[]{1, layerConf.getNIn()}, layerConf.getVisibleBiasInit()); visibleBiasView.assign(ret); @@ -82,10 +83,10 @@ public class PretrainParamInitializer extends DefaultParamInitializer { @Override - public Map getGradientsFromFlattened(NeuralNetConfiguration conf, INDArray gradientView) { + public Map getGradientsFromFlattened(LayerConfiguration conf, INDArray gradientView) { Map out = super.getGradientsFromFlattened(conf, gradientView); org.deeplearning4j.nn.conf.layers.FeedForwardLayer layerConf = - (org.deeplearning4j.nn.conf.layers.FeedForwardLayer) conf.getLayer(); + (org.deeplearning4j.nn.conf.layers.FeedForwardLayer) conf; val nIn = layerConf.getNIn(); val nOut = layerConf.getNOut(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/SameDiffParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/SameDiffParamInitializer.java index 2b9c3484c..0846e0bf5 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/SameDiffParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/SameDiffParamInitializer.java @@ -22,9 +22,10 @@ package org.deeplearning4j.nn.params; import lombok.extern.slf4j.Slf4j; import lombok.val; +import org.deeplearning4j.nn.api.AbstractParamInitializer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.samediff.AbstractSameDiffLayer; import org.deeplearning4j.nn.conf.layers.samediff.SameDiffVertex; import org.nd4j.linalg.api.ndarray.INDArray; @@ -38,7 +39,7 @@ import java.util.Map; import static org.nd4j.linalg.indexing.NDArrayIndex.interval; @Slf4j -public class SameDiffParamInitializer implements ParamInitializer { +public class SameDiffParamInitializer extends AbstractParamInitializer { private static final SameDiffParamInitializer INSTANCE = new SameDiffParamInitializer(); @@ -47,12 +48,7 @@ public class SameDiffParamInitializer implements ParamInitializer { } @Override - public long numParams(NeuralNetConfiguration conf) { - return numParams(conf.getLayer()); - } - - @Override - public long numParams(Layer layer) { + public long numParams(LayerConfiguration layer) { AbstractSameDiffLayer sd = (AbstractSameDiffLayer)layer; Map m = sd.getLayerParams().getParamShapes(); int n = 0; @@ -63,36 +59,36 @@ public class SameDiffParamInitializer implements ParamInitializer { } @Override - public List paramKeys(Layer layer) { + public List paramKeys(LayerConfiguration layer) { AbstractSameDiffLayer sd = (AbstractSameDiffLayer)layer; return sd.getLayerParams().getParameterKeys(); } @Override - public List weightKeys(Layer layer) { + public List weightKeys(LayerConfiguration layer) { AbstractSameDiffLayer sd = (AbstractSameDiffLayer)layer; return sd.getLayerParams().getWeightParameterKeys(); } @Override - public List biasKeys(Layer layer) { + public List biasKeys(LayerConfiguration layer) { AbstractSameDiffLayer sd = (AbstractSameDiffLayer)layer; return sd.getLayerParams().getBiasParameterKeys(); } @Override - public boolean isWeightParam(Layer layer, String key) { + public boolean isWeightParam(LayerConfiguration layer, String key) { return weightKeys(layer).contains(key); } @Override - public boolean isBiasParam(Layer layer, String key) { + public boolean isBiasParam(LayerConfiguration layer, String key) { return biasKeys(layer).contains(key); } @Override - public Map init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { - AbstractSameDiffLayer sd = (AbstractSameDiffLayer) conf.getLayer(); + public Map init(LayerConfiguration conf, INDArray paramsView, boolean initializeParams) { + AbstractSameDiffLayer sd = (AbstractSameDiffLayer) conf; Map out = subsetAndReshape(sd.getLayerParams().getParameterKeys(), sd.getLayerParams().getParamShapes(), paramsView, sd); if(initializeParams){ @@ -107,8 +103,8 @@ public class SameDiffParamInitializer implements ParamInitializer { } @Override - public Map getGradientsFromFlattened(NeuralNetConfiguration conf, INDArray gradientView) { - AbstractSameDiffLayer sd = (AbstractSameDiffLayer) conf.getLayer(); + public Map getGradientsFromFlattened(LayerConfiguration conf, INDArray gradientView) { + AbstractSameDiffLayer sd = (AbstractSameDiffLayer) conf; return subsetAndReshape(sd.getLayerParams().getParameterKeys(), sd.getLayerParams().getParamShapes(), gradientView, sd); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/SeparableConvolutionParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/SeparableConvolutionParamInitializer.java index bb7dabb4e..9df032560 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/SeparableConvolutionParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/SeparableConvolutionParamInitializer.java @@ -22,9 +22,10 @@ package org.deeplearning4j.nn.params; import lombok.val; +import org.deeplearning4j.nn.api.AbstractParamInitializer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.SeparableConvolution2D; import org.deeplearning4j.nn.weights.WeightInitUtil; import org.nd4j.linalg.api.ndarray.INDArray; @@ -32,7 +33,7 @@ import org.nd4j.linalg.indexing.NDArrayIndex; import java.util.*; -public class SeparableConvolutionParamInitializer implements ParamInitializer { +public class SeparableConvolutionParamInitializer extends AbstractParamInitializer { private static final SeparableConvolutionParamInitializer INSTANCE = new SeparableConvolutionParamInitializer(); @@ -45,12 +46,7 @@ public class SeparableConvolutionParamInitializer implements ParamInitializer { public final static String BIAS_KEY = DefaultParamInitializer.BIAS_KEY; @Override - public long numParams(NeuralNetConfiguration conf) { - return numParams(conf.getLayer()); - } - - @Override - public long numParams(Layer l) { + public long numParams(LayerConfiguration l) { SeparableConvolution2D layerConf = (SeparableConvolution2D) l; val depthWiseParams = numDepthWiseParams(layerConf); @@ -96,7 +92,7 @@ public class SeparableConvolutionParamInitializer implements ParamInitializer { } @Override - public List paramKeys(Layer layer) { + public List paramKeys(LayerConfiguration layer) { SeparableConvolution2D layerConf = (SeparableConvolution2D) layer; if(layerConf.hasBias()){ @@ -107,12 +103,12 @@ public class SeparableConvolutionParamInitializer implements ParamInitializer { } @Override - public List weightKeys(Layer layer) { + public List weightKeys(LayerConfiguration layer) { return Arrays.asList(DEPTH_WISE_WEIGHT_KEY, POINT_WISE_WEIGHT_KEY); } @Override - public List biasKeys(Layer layer) { + public List biasKeys(LayerConfiguration layer) { SeparableConvolution2D layerConf = (SeparableConvolution2D) layer; if(layerConf.hasBias()){ @@ -123,23 +119,23 @@ public class SeparableConvolutionParamInitializer implements ParamInitializer { } @Override - public boolean isWeightParam(Layer layer, String key) { + public boolean isWeightParam(LayerConfiguration layer, String key) { return DEPTH_WISE_WEIGHT_KEY.equals(key) || POINT_WISE_WEIGHT_KEY.equals(key); } @Override - public boolean isBiasParam(Layer layer, String key) { + public boolean isBiasParam(LayerConfiguration layer, String key) { return BIAS_KEY.equals(key); } @Override - public Map init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { - SeparableConvolution2D layer = (SeparableConvolution2D) conf.getLayer(); + public Map init(LayerConfiguration conf, INDArray paramsView, boolean initializeParams) { + SeparableConvolution2D layer = (SeparableConvolution2D) conf; if (layer.getKernelSize().length != 2) throw new IllegalArgumentException("Filter size must be == 2"); Map params = Collections.synchronizedMap(new LinkedHashMap()); - SeparableConvolution2D layerConf = (SeparableConvolution2D) conf.getLayer(); + SeparableConvolution2D layerConf = (SeparableConvolution2D) conf; val depthWiseParams = numDepthWiseParams(layerConf); val biasParams = numBiasParams(layerConf); @@ -164,10 +160,10 @@ public class SeparableConvolutionParamInitializer implements ParamInitializer { } @Override - public Map getGradientsFromFlattened(NeuralNetConfiguration conf, INDArray gradientView) { + public Map getGradientsFromFlattened(LayerConfiguration conf, INDArray gradientView) { SeparableConvolution2D layerConf = - (SeparableConvolution2D) conf.getLayer(); + (SeparableConvolution2D) conf; int[] kernel = layerConf.getKernelSize(); val nIn = layerConf.getNIn(); @@ -195,22 +191,22 @@ public class SeparableConvolutionParamInitializer implements ParamInitializer { return out; } - protected INDArray createBias(NeuralNetConfiguration conf, INDArray biasView, boolean initializeParams) { + protected INDArray createBias(LayerConfiguration conf, INDArray biasView, boolean initializeParams) { SeparableConvolution2D layerConf = - (SeparableConvolution2D) conf.getLayer(); + (SeparableConvolution2D) conf; if (initializeParams) biasView.assign(layerConf.getBiasInit()); return biasView; } - protected INDArray createDepthWiseWeightMatrix(NeuralNetConfiguration conf, INDArray weightView, boolean initializeParams) { + protected INDArray createDepthWiseWeightMatrix(LayerConfiguration conf, INDArray weightView, boolean initializeParams) { /* Create a 4d weight matrix of: (channels multiplier, num input channels, kernel height, kernel width) Inputs to the convolution layer are: (batch size, num input feature maps, image height, image width) */ SeparableConvolution2D layerConf = - (SeparableConvolution2D) conf.getLayer(); + (SeparableConvolution2D) conf; int depthMultiplier = layerConf.getDepthMultiplier(); if (initializeParams) { @@ -233,14 +229,14 @@ public class SeparableConvolutionParamInitializer implements ParamInitializer { } } - protected INDArray createPointWiseWeightMatrix(NeuralNetConfiguration conf, INDArray weightView, + protected INDArray createPointWiseWeightMatrix(LayerConfiguration conf, INDArray weightView, boolean initializeParams) { /* Create a 4d weight matrix of: (num output channels, channels multiplier * num input channels, kernel height, kernel width) */ SeparableConvolution2D layerConf = - (SeparableConvolution2D) conf.getLayer(); + (SeparableConvolution2D) conf; int depthMultiplier = layerConf.getDepthMultiplier(); if (initializeParams) { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/SimpleRnnParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/SimpleRnnParamInitializer.java index f3fbf1e11..603492afa 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/SimpleRnnParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/SimpleRnnParamInitializer.java @@ -21,9 +21,10 @@ package org.deeplearning4j.nn.params; import lombok.val; +import org.deeplearning4j.nn.api.AbstractParamInitializer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.recurrent.SimpleRnn; import org.deeplearning4j.nn.weights.IWeightInit; import org.nd4j.linalg.api.ndarray.INDArray; @@ -31,9 +32,8 @@ import org.nd4j.linalg.api.ndarray.INDArray; import java.util.*; import static org.nd4j.linalg.indexing.NDArrayIndex.interval; -import static org.nd4j.linalg.indexing.NDArrayIndex.point; -public class SimpleRnnParamInitializer implements ParamInitializer { +public class SimpleRnnParamInitializer extends AbstractParamInitializer { private static final SimpleRnnParamInitializer INSTANCE = new SimpleRnnParamInitializer(); @@ -51,12 +51,7 @@ public class SimpleRnnParamInitializer implements ParamInitializer { @Override - public long numParams(NeuralNetConfiguration conf) { - return numParams(conf.getLayer()); - } - - @Override - public long numParams(Layer layer) { + public long numParams(LayerConfiguration layer) { SimpleRnn c = (SimpleRnn)layer; val nIn = c.getNIn(); val nOut = c.getNOut(); @@ -64,7 +59,7 @@ public class SimpleRnnParamInitializer implements ParamInitializer { } @Override - public List paramKeys(Layer layer) { + public List paramKeys(LayerConfiguration layer) { final ArrayList keys = new ArrayList<>(3); keys.addAll(weightKeys(layer)); keys.addAll(biasKeys(layer)); @@ -72,7 +67,7 @@ public class SimpleRnnParamInitializer implements ParamInitializer { } @Override - public List weightKeys(Layer layer) { + public List weightKeys(LayerConfiguration layer) { final ArrayList keys = new ArrayList<>(WEIGHT_KEYS); if(hasLayerNorm(layer)){ @@ -83,23 +78,23 @@ public class SimpleRnnParamInitializer implements ParamInitializer { } @Override - public List biasKeys(Layer layer) { + public List biasKeys(LayerConfiguration layer) { return BIAS_KEYS; } @Override - public boolean isWeightParam(Layer layer, String key) { + public boolean isWeightParam(LayerConfiguration layer, String key) { return WEIGHT_KEY.equals(key) || RECURRENT_WEIGHT_KEY.equals(key) || GAIN_KEY.equals(key); } @Override - public boolean isBiasParam(Layer layer, String key) { + public boolean isBiasParam(LayerConfiguration layer, String key) { return BIAS_KEY.equals(key); } @Override - public Map init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { - SimpleRnn c = (SimpleRnn)conf.getLayer(); + public Map init(LayerConfiguration conf, INDArray paramsView, boolean initializeParams) { + SimpleRnn c = (SimpleRnn)conf; val nIn = c.getNIn(); val nOut = c.getNOut(); @@ -140,8 +135,8 @@ public class SimpleRnnParamInitializer implements ParamInitializer { } @Override - public Map getGradientsFromFlattened(NeuralNetConfiguration conf, INDArray gradientView) { - SimpleRnn c = (SimpleRnn)conf.getLayer(); + public Map getGradientsFromFlattened(LayerConfiguration conf, INDArray gradientView) { + SimpleRnn c = (SimpleRnn)conf; val nIn = c.getNIn(); val nOut = c.getNOut(); @@ -172,7 +167,7 @@ public class SimpleRnnParamInitializer implements ParamInitializer { return m; } - protected boolean hasLayerNorm(Layer layer){ + protected boolean hasLayerNorm(LayerConfiguration layer){ if(layer instanceof SimpleRnn){ return ((SimpleRnn) layer).hasLayerNorm(); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/VariationalAutoencoderParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/VariationalAutoencoderParamInitializer.java index 399bf3a47..9284843d5 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/VariationalAutoencoderParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/VariationalAutoencoderParamInitializer.java @@ -22,7 +22,7 @@ package org.deeplearning4j.nn.params; import lombok.val; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder; import org.deeplearning4j.nn.weights.IWeightInit; import org.nd4j.linalg.api.ndarray.INDArray; @@ -71,8 +71,8 @@ public class VariationalAutoencoderParamInitializer extends DefaultParamInitiali @Override - public long numParams(NeuralNetConfiguration conf) { - VariationalAutoencoder layer = (VariationalAutoencoder) conf.getLayer(); + public long numParams(LayerConfiguration conf) { + VariationalAutoencoder layer = (VariationalAutoencoder) conf; val nIn = layer.getNIn(); val nOut = layer.getNOut(); @@ -116,7 +116,7 @@ public class VariationalAutoencoderParamInitializer extends DefaultParamInitiali } @Override - public List paramKeys(Layer l) { + public List paramKeys(LayerConfiguration l) { VariationalAutoencoder layer = (VariationalAutoencoder) l; int[] encoderLayerSizes = layer.getEncoderLayerSizes(); int[] decoderLayerSizes = layer.getDecoderLayerSizes(); @@ -154,7 +154,7 @@ public class VariationalAutoencoderParamInitializer extends DefaultParamInitiali } @Override - public List weightKeys(Layer layer) { + public List weightKeys(LayerConfiguration layer) { List out = new ArrayList<>(); for(String s : paramKeys(layer)){ if(isWeightParam(layer, s)){ @@ -165,7 +165,7 @@ public class VariationalAutoencoderParamInitializer extends DefaultParamInitiali } @Override - public List biasKeys(Layer layer) { + public List biasKeys(LayerConfiguration layer) { List out = new ArrayList<>(); for(String s : paramKeys(layer)){ if(isBiasParam(layer, s)){ @@ -176,24 +176,24 @@ public class VariationalAutoencoderParamInitializer extends DefaultParamInitiali } @Override - public boolean isWeightParam(Layer layer, String key) { + public boolean isWeightParam(LayerConfiguration layer, String key) { return key.endsWith(WEIGHT_KEY_SUFFIX); } @Override - public boolean isBiasParam(Layer layer, String key) { + public boolean isBiasParam(LayerConfiguration layer, String key) { return key.endsWith(BIAS_KEY_SUFFIX); } @Override - public Map init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { + public Map init(LayerConfiguration conf, INDArray paramsView, boolean initializeParams) { if (paramsView.length() != numParams(conf)) { throw new IllegalArgumentException("Incorrect paramsView length: Expected length " + numParams(conf) + ", got length " + paramsView.length()); } Map ret = new LinkedHashMap<>(); - VariationalAutoencoder layer = (VariationalAutoencoder) conf.getLayer(); + VariationalAutoencoder layer = (VariationalAutoencoder) conf; val nIn = layer.getNIn(); val nOut = layer.getNOut(); @@ -316,9 +316,9 @@ public class VariationalAutoencoderParamInitializer extends DefaultParamInitiali } @Override - public Map getGradientsFromFlattened(NeuralNetConfiguration conf, INDArray gradientView) { + public Map getGradientsFromFlattened(LayerConfiguration conf, INDArray gradientView) { Map ret = new LinkedHashMap<>(); - VariationalAutoencoder layer = (VariationalAutoencoder) conf.getLayer(); + VariationalAutoencoder layer = (VariationalAutoencoder) conf; val nIn = layer.getNIn(); val nOut = layer.getNOut(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/WrapperLayerParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/WrapperLayerParamInitializer.java index 234226eb4..7cb7059c8 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/WrapperLayerParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/WrapperLayerParamInitializer.java @@ -20,16 +20,17 @@ package org.deeplearning4j.nn.params; +import org.deeplearning4j.nn.api.AbstractParamInitializer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.wrapper.BaseWrapperLayer; import org.nd4j.linalg.api.ndarray.INDArray; import java.util.List; import java.util.Map; -public class WrapperLayerParamInitializer implements ParamInitializer { +public class WrapperLayerParamInitializer extends AbstractParamInitializer { private static final WrapperLayerParamInitializer INSTANCE = new WrapperLayerParamInitializer(); @@ -42,67 +43,62 @@ public class WrapperLayerParamInitializer implements ParamInitializer { } @Override - public long numParams(NeuralNetConfiguration conf) { - return numParams(conf.getLayer()); - } - - @Override - public long numParams(Layer layer) { - Layer l = underlying(layer); + public long numParams(LayerConfiguration layer) { + LayerConfiguration l = underlying(layer); return l.initializer().numParams(l); } @Override - public List paramKeys(Layer layer) { - Layer l = underlying(layer); + public List paramKeys(LayerConfiguration layer) { + LayerConfiguration l = underlying(layer); return l.initializer().paramKeys(l); } @Override - public List weightKeys(Layer layer) { - Layer l = underlying(layer); + public List weightKeys(LayerConfiguration layer) { + LayerConfiguration l = underlying(layer); return l.initializer().weightKeys(l); } @Override - public List biasKeys(Layer layer) { - Layer l = underlying(layer); + public List biasKeys(LayerConfiguration layer) { + LayerConfiguration l = underlying(layer); return l.initializer().biasKeys(l); } @Override - public boolean isWeightParam(Layer layer, String key) { - Layer l = underlying(layer); + public boolean isWeightParam(LayerConfiguration layer, String key) { + LayerConfiguration l = underlying(layer); return l.initializer().isWeightParam(layer, key); } @Override - public boolean isBiasParam(Layer layer, String key) { - Layer l = underlying(layer); + public boolean isBiasParam(LayerConfiguration layer, String key) { + LayerConfiguration l = underlying(layer); return l.initializer().isBiasParam(layer, key); } @Override - public Map init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { - Layer orig = conf.getLayer(); - Layer l = underlying(conf.getLayer()); - conf.setLayer(l); + public Map init(LayerConfiguration conf, INDArray paramsView, boolean initializeParams) { + LayerConfiguration orig = conf; + LayerConfiguration l = underlying(conf); + Map m = l.initializer().init(conf, paramsView, initializeParams); - conf.setLayer(orig); + return m; } @Override - public Map getGradientsFromFlattened(NeuralNetConfiguration conf, INDArray gradientView) { - Layer orig = conf.getLayer(); - Layer l = underlying(conf.getLayer()); - conf.setLayer(l); + public Map getGradientsFromFlattened(LayerConfiguration conf, INDArray gradientView) { + LayerConfiguration orig = conf; + LayerConfiguration l = underlying(conf); + Map m = l.initializer().getGradientsFromFlattened(conf, gradientView); - conf.setLayer(orig); + return m; } - private Layer underlying(Layer layer){ + private LayerConfiguration underlying(LayerConfiguration layer){ while (layer instanceof BaseWrapperLayer) { layer = ((BaseWrapperLayer)layer).getUnderlying(); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/FineTuneConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/FineTuneConfiguration.java index 3f2ddd88b..73a31b96b 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/FineTuneConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/FineTuneConfiguration.java @@ -20,23 +20,40 @@ package org.deeplearning4j.nn.transferlearning; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonTypeInfo; +import com.fasterxml.jackson.core.JsonProcessingException; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; import lombok.AllArgsConstructor; import lombok.Data; import lombok.NoArgsConstructor; import lombok.ToString; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.api.layers.LayerConstraint; -import org.deeplearning4j.nn.conf.*; +import org.deeplearning4j.nn.conf.BackpropType; +import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; +import org.deeplearning4j.nn.conf.ConvolutionMode; +import org.deeplearning4j.nn.conf.GradientNormalization; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.Updater; +import org.deeplearning4j.nn.conf.WorkspaceMode; import org.deeplearning4j.nn.conf.distribution.Distribution; import org.deeplearning4j.nn.conf.dropout.Dropout; import org.deeplearning4j.nn.conf.dropout.IDropout; -import org.deeplearning4j.nn.conf.layers.*; +import org.deeplearning4j.nn.conf.layers.BaseLayer; +import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; +import org.deeplearning4j.nn.conf.layers.LayerValidation; +import org.deeplearning4j.nn.conf.layers.SubsamplingLayer; import org.deeplearning4j.nn.conf.stepfunctions.StepFunction; import org.deeplearning4j.nn.conf.weightnoise.IWeightNoise; import org.deeplearning4j.nn.weights.IWeightInit; import org.deeplearning4j.nn.weights.WeightInit; import org.deeplearning4j.nn.weights.WeightInitDistribution; import org.deeplearning4j.util.NetworkUtils; +import org.nd4j.common.primitives.Optional; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.learning.config.IUpdater; @@ -44,14 +61,6 @@ import org.nd4j.linalg.learning.regularization.L1Regularization; import org.nd4j.linalg.learning.regularization.L2Regularization; import org.nd4j.linalg.learning.regularization.Regularization; import org.nd4j.linalg.learning.regularization.WeightDecay; -import org.nd4j.common.primitives.Optional; -import com.fasterxml.jackson.annotation.JsonInclude; -import com.fasterxml.jackson.annotation.JsonTypeInfo; -import com.fasterxml.jackson.core.JsonProcessingException; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; @JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, include = JsonTypeInfo.As.PROPERTY, property = "type") @JsonInclude(JsonInclude.Include.NON_NULL) @@ -60,738 +69,794 @@ import java.util.List; @Data public class FineTuneConfiguration { - protected IActivation activationFn; - protected IWeightInit weightInitFn; - protected Double biasInit; - protected List regularization; - protected List regularizationBias; + protected IActivation activationFn; + protected IWeightInit weightInitFn; + protected Double biasInit; + protected List regularization; + protected List regularizationBias; + protected boolean removeL2 = false; //For: .l2(0.0) -> user means "no l2" so we should remove it if it is present in the original model... + protected boolean removeL2Bias = false; + protected boolean removeL1 = false; + protected boolean removeL1Bias = false; + protected boolean removeWD = false; + protected boolean removeWDBias = false; + protected Optional dropout; + protected Optional weightNoise; + protected IUpdater updater; + protected IUpdater biasUpdater; + protected Boolean miniBatch; + protected Integer maxNumLineSearchIterations; + protected Long seed; + protected OptimizationAlgorithm optimizationAlgo; + protected StepFunction stepFunction; + protected Boolean minimize; + protected Optional gradientNormalization; + protected Double gradientNormalizationThreshold; + protected ConvolutionMode convolutionMode; + protected ConvolutionLayer.AlgoMode cudnnAlgoMode; + protected Optional> constraints; + + protected Boolean pretrain; + protected Boolean backprop; + protected BackpropType backpropType; + protected Integer tbpttFwdLength; + protected Integer tbpttBackLength; + + protected WorkspaceMode trainingWorkspaceMode; + protected WorkspaceMode inferenceWorkspaceMode; + + public static Builder builder() { + return new Builder(); + } + + private static T get(Optional optional) { + if (optional == null) { + return null; + } + return optional.orElse(null); + } + + public static FineTuneConfiguration fromJson(String json) { + try { + return NeuralNetConfiguration.mapper().readValue(json, FineTuneConfiguration.class); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + public static FineTuneConfiguration fromYaml(String yaml) { + try { + return NeuralNetConfiguration.mapperYaml().readValue(yaml, FineTuneConfiguration.class); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + /** + * public NeuralNetConfiguration appliedNeuralNetConfiguration(NeuralNetConfiguration nnc) { + * applyToNeuralNetConfiguration(nnc); nnc = new + * NeuralNetConfiguration.NeuralNetConfigurationBuilder(nnc.clone()).build(); return nnc; } + **/ + + public void applyToLayerConfiguration(LayerConfiguration layerConfiguration) { + + Updater originalUpdater = null; + WeightInit origWeightInit = null; + + if (layerConfiguration != null) { + //As per NeuralNetConfiguration.configureLayer and LayerValidation.configureBaseLayer: only copy dropout to base layers + // this excludes things like subsampling and activation layers + if (dropout != null && layerConfiguration instanceof BaseLayer) { + IDropout d = dropout.orElse(null); + if (d != null) { + d = d.clone(); //Clone to avoid shared state between layers + } + layerConfiguration.setIDropout(d); + } + if (constraints != null) { + layerConfiguration.setConstraints(constraints.orElse(null)); + } + } + + if (layerConfiguration != null && layerConfiguration instanceof BaseLayer) { + BaseLayer bl = (BaseLayer) layerConfiguration; + if (activationFn != null) { + bl.setActivationFn(activationFn); + } + if (weightInitFn != null) { + bl.setWeightInitFn(weightInitFn); + } + if (biasInit != null) { + bl.setBiasInit(biasInit); + } + if (regularization != null && !regularization.isEmpty()) { + bl.setRegularization(regularization); + } + if (regularizationBias != null && !regularizationBias.isEmpty()) { + bl.setRegularizationBias(regularizationBias); + } + if (removeL2) { + NetworkUtils.removeInstances(bl.getRegularization(), L2Regularization.class); + } + if (removeL2Bias) { + NetworkUtils.removeInstances(bl.getRegularizationBias(), L2Regularization.class); + } + if (removeL1) { + NetworkUtils.removeInstances(bl.getRegularization(), L1Regularization.class); + } + if (removeL1Bias) { + NetworkUtils.removeInstances(bl.getRegularizationBias(), L1Regularization.class); + } + if (removeWD) { + NetworkUtils.removeInstances(bl.getRegularization(), WeightDecay.class); + } + if (removeWDBias) { + NetworkUtils.removeInstances(bl.getRegularizationBias(), WeightDecay.class); + } + if (gradientNormalization != null) { + bl.setGradientNormalization(gradientNormalization.orElse(null)); + } + if (gradientNormalizationThreshold != null) { + bl.setGradientNormalizationThreshold(gradientNormalizationThreshold); + } + if (updater != null) { + bl.setIUpdater(updater); + } + if (biasUpdater != null) { + bl.setBiasUpdater(biasUpdater); + } + if (weightNoise != null) { + bl.setWeightNoise(weightNoise.orElse(null)); + } + } + NeuralNetConfiguration nnc = layerConfiguration.getNetConfiguration(); + if (miniBatch != null) { + nnc.setMiniBatch(miniBatch); + } + if (maxNumLineSearchIterations != null) { + nnc.setMaxNumLineSearchIterations(maxNumLineSearchIterations); + } + if (seed != null) { + nnc.setSeed(seed); + } + if (optimizationAlgo != null) { + nnc.setOptimizationAlgo(optimizationAlgo); + } + if (stepFunction != null) { + nnc.setStepFunction(stepFunction); + } + if (minimize != null) { + nnc.setMinimize(minimize); + } + + if (convolutionMode != null && layerConfiguration instanceof ConvolutionLayer) { + ((ConvolutionLayer) layerConfiguration).setConvolutionMode(convolutionMode); + } + if (cudnnAlgoMode != null && layerConfiguration instanceof ConvolutionLayer) { + ((ConvolutionLayer) layerConfiguration).setCudnnAlgoMode(cudnnAlgoMode); + } + if (convolutionMode != null && layerConfiguration instanceof SubsamplingLayer) { + ((SubsamplingLayer) layerConfiguration).setConvolutionMode(convolutionMode); + } + + //Perform validation + if (layerConfiguration != null) { + LayerValidation.generalValidation(layerConfiguration.getLayerName(), layerConfiguration, get(dropout), regularization, + regularizationBias, + get(constraints), null, null); + } + } + + + public void applyToComputationGraphConfiguration(ComputationGraphConfiguration conf) { + if (backpropType != null) { + conf.setBackpropType(backpropType); + } + if (tbpttFwdLength != null) { + conf.setTbpttFwdLength(tbpttFwdLength); + } + if (tbpttBackLength != null) { + conf.setTbpttBackLength(tbpttBackLength); + } + } + + public NeuralNetConfiguration appliedNeuralNetConfigurationBuilder() { + NeuralNetConfiguration.NeuralNetConfigurationBuilder confBuilder = NeuralNetConfiguration.builder(); + + if (activationFn != null) { + confBuilder.activationFn(activationFn); + } + if (weightInitFn != null) { + confBuilder.weightInitFn(weightInitFn); + } + if (biasInit != null) { + confBuilder.biasInit(biasInit); + } + if (regularization != null) { + confBuilder.regularization(regularization); + } + if (regularizationBias != null) { + confBuilder.regularizationBias(regularizationBias); + } + if (dropout != null) { + confBuilder.idropOut(dropout.orElse(null)); + } + if (updater != null) { + confBuilder.updater(updater); + } + if (biasUpdater != null) { + confBuilder.biasUpdater(biasUpdater); + } + if (miniBatch != null) { + confBuilder.miniBatch(miniBatch); + } + if (maxNumLineSearchIterations != null) { + confBuilder.maxNumLineSearchIterations(maxNumLineSearchIterations); + } + if (seed != null) { + confBuilder.seed(seed); + } + if (optimizationAlgo != null) { + confBuilder.optimizationAlgo(optimizationAlgo); + } + if (stepFunction != null) { + confBuilder.stepFunction(stepFunction); + } + if (minimize != null) { + confBuilder.minimize(minimize); + } + if (gradientNormalization != null) { + confBuilder.gradientNormalization(gradientNormalization.orElse(null)); + } + if (gradientNormalizationThreshold != null) { + confBuilder.gradientNormalizationThreshold(gradientNormalizationThreshold); + } + if (trainingWorkspaceMode != null) { + confBuilder.trainingWorkspaceMode(trainingWorkspaceMode); + } + if (inferenceWorkspaceMode != null) { + confBuilder.inferenceWorkspaceMode(inferenceWorkspaceMode); + } + return confBuilder.build(); + } + + public String toJson() { + try { + return NeuralNetConfiguration.mapper().writeValueAsString(this); + } catch (JsonProcessingException e) { + throw new RuntimeException(e); + } + } + + public String toYaml() { + try { + return NeuralNetConfiguration.mapperYaml().writeValueAsString(this); + } catch (JsonProcessingException e) { + throw new RuntimeException(e); + } + } + + /* + * Can't use Lombok @Builder annotation due to optionals (otherwise we have a bunch of ugly .x(Optional value) + * methods - lombok builder doesn't support excluding fields? :( + * Note the use of optional here: gives us 3 states... + * 1. Null: not set + * 2. Optional (empty): set to null + * 3. Optional (not empty): set to specific value + * + * Obviously, having null only makes sense for some things (dropout, etc) whereas null for other things doesn't + * make sense + */ + @ToString + public static class Builder { + + protected List regularization = new ArrayList<>(); + protected List regularizationBias = new ArrayList<>(); protected boolean removeL2 = false; //For: .l2(0.0) -> user means "no l2" so we should remove it if it is present in the original model... protected boolean removeL2Bias = false; protected boolean removeL1 = false; protected boolean removeL1Bias = false; protected boolean removeWD = false; protected boolean removeWDBias = false; - protected Optional dropout; - protected Optional weightNoise; - protected IUpdater updater; - protected IUpdater biasUpdater; - protected Boolean miniBatch; - protected Integer maxNumLineSearchIterations; - protected Long seed; - protected OptimizationAlgorithm optimizationAlgo; - protected StepFunction stepFunction; - protected Boolean minimize; - protected Optional gradientNormalization; - protected Double gradientNormalizationThreshold; - protected ConvolutionMode convolutionMode; - protected ConvolutionLayer.AlgoMode cudnnAlgoMode; - protected Optional> constraints; + private IActivation activation; + private IWeightInit weightInitFn; + private Double biasInit; + private Optional dropout; + private Optional weightNoise; + private IUpdater updater; + private IUpdater biasUpdater; + private Boolean miniBatch; + private Integer maxNumLineSearchIterations; + private Long seed; + private OptimizationAlgorithm optimizationAlgo; + private StepFunction stepFunction; + private Boolean minimize; + private Optional gradientNormalization; + private Double gradientNormalizationThreshold; + private ConvolutionMode convolutionMode; + private ConvolutionLayer.AlgoMode cudnnAlgoMode; + private Optional> constraints; + private Boolean pretrain; + private Boolean backprop; + private BackpropType backpropType; + private Integer tbpttFwdLength; + private Integer tbpttBackLength; + private WorkspaceMode trainingWorkspaceMode; + private WorkspaceMode inferenceWorkspaceMode; - protected Boolean pretrain; - protected Boolean backprop; - protected BackpropType backpropType; - protected Integer tbpttFwdLength; - protected Integer tbpttBackLength; + public Builder() { - protected WorkspaceMode trainingWorkspaceMode; - protected WorkspaceMode inferenceWorkspaceMode; - - public static Builder builder() { - return new Builder(); } - /* - * Can't use Lombok @Builder annotation due to optionals (otherwise we have a bunch of ugly .x(Optional value) - * methods - lombok builder doesn't support excluding fields? :( - * Note the use of optional here: gives us 3 states... - * 1. Null: not set - * 2. Optional (empty): set to null - * 3. Optional (not empty): set to specific value - * - * Obviously, having null only makes sense for some things (dropout, etc) whereas null for other things doesn't - * make sense + /** + * Activation function / neuron non-linearity */ - @ToString - public static class Builder { - private IActivation activation; - private IWeightInit weightInitFn; - private Double biasInit; - protected List regularization = new ArrayList<>(); - protected List regularizationBias = new ArrayList<>(); - protected boolean removeL2 = false; //For: .l2(0.0) -> user means "no l2" so we should remove it if it is present in the original model... - protected boolean removeL2Bias = false; - protected boolean removeL1 = false; - protected boolean removeL1Bias = false; - protected boolean removeWD = false; - protected boolean removeWDBias = false; - private Optional dropout; - private Optional weightNoise; - private IUpdater updater; - private IUpdater biasUpdater; - private Boolean miniBatch; - private Integer maxNumLineSearchIterations; - private Long seed; - private OptimizationAlgorithm optimizationAlgo; - private StepFunction stepFunction; - private Boolean minimize; - private Optional gradientNormalization; - private Double gradientNormalizationThreshold; - private ConvolutionMode convolutionMode; - private ConvolutionLayer.AlgoMode cudnnAlgoMode; - private Optional> constraints; - private Boolean pretrain; - private Boolean backprop; - private BackpropType backpropType; - private Integer tbpttFwdLength; - private Integer tbpttBackLength; - private WorkspaceMode trainingWorkspaceMode; - private WorkspaceMode inferenceWorkspaceMode; + public Builder activation(IActivation activationFn) { + this.activation = activationFn; + return this; + } - public Builder() { + /** + * Activation function / neuron non-linearity + */ + public Builder activation(Activation activation) { + this.activation = activation.getActivationFunction(); + return this; + } - } + /** + * Weight initialization scheme to use, for initial weight values + * + * @see IWeightInit + */ + public Builder weightInit(IWeightInit weightInit) { + this.weightInitFn = weightInit; + return this; + } - /** - * Activation function / neuron non-linearity - */ - public Builder activation(IActivation activationFn) { - this.activation = activationFn; - return this; - } + /** + * Weight initialization scheme to use, for initial weight values + * + * @see WeightInit + */ + public Builder weightInit(WeightInit weightInit) { + if (weightInit == WeightInit.DISTRIBUTION) { + throw new UnsupportedOperationException( + "Not supported!, User weightInit(Distribution distribution) instead!"); + } - /** - * Activation function / neuron non-linearity - */ - public Builder activation(Activation activation) { - this.activation = activation.getActivationFunction(); - return this; - } - - /** - * Weight initialization scheme to use, for initial weight values - * - * @see IWeightInit - */ - public Builder weightInit(IWeightInit weightInit) { - this.weightInitFn = weightInit; - return this; - } - - /** - * Weight initialization scheme to use, for initial weight values - * - * @see WeightInit - */ - public Builder weightInit(WeightInit weightInit) { - if(weightInit == WeightInit.DISTRIBUTION) { - throw new UnsupportedOperationException("Not supported!, User weightInit(Distribution distribution) instead!"); - } - - this.weightInitFn = weightInit.getWeightInitFunction(); - return this; - } - - - /** - * Set weight initialization scheme to random sampling via the specified distribution. - * Equivalent to: {@code .weightInit(new WeightInitDistribution(distribution))} - * - * @param distribution Distribution to use for weight initialization - */ - public Builder weightInit(Distribution distribution){ - return weightInit(new WeightInitDistribution(distribution)); - } - - /** - * Constant for bias initialization. Default: 0.0 - * - * @param biasInit Constant for bias initialization - */ - public Builder biasInit(double biasInit) { - this.biasInit = biasInit; - return this; - } - - /** - * Distribution to sample initial weights from. - * Equivalent to: {@code .weightInit(new WeightInitDistribution(distribution))} - */ - @Deprecated - public Builder dist(Distribution dist) { - return weightInit(dist); - } - - /** - * L1 regularization coefficient for the weights (excluding biases) - */ - public Builder l1(double l1) { - NetworkUtils.removeInstances(regularization, L1Regularization.class); - if(l1 > 0.0) { - regularization.add(new L1Regularization(l1)); - } - return this; - } - - /** - * L2 regularization coefficient for the weights (excluding biases)
- * Note: Generally, {@link WeightDecay} (set via {@link #weightDecay(double,boolean)} should be preferred to - * L2 regularization. See {@link WeightDecay} javadoc for further details.
- */ - public Builder l2(double l2) { - NetworkUtils.removeInstances(regularization, L2Regularization.class); - if(l2 > 0.0) { - NetworkUtils.removeInstancesWithWarning(regularization, WeightDecay.class, "WeightDecay regularization removed: incompatible with added L2 regularization"); - regularization.add(new L2Regularization(l2)); - } else { - removeL2 = true; - } - return this; - } - - /** - * L1 regularization coefficient for the bias parameters - */ - public Builder l1Bias(double l1Bias) { - NetworkUtils.removeInstances(regularizationBias, L1Regularization.class); - if(l1Bias > 0.0) { - regularizationBias.add(new L1Regularization(l1Bias)); - } else { - removeL1Bias = true; - } - return this; - } - - /** - * L2 regularization coefficient for the bias parameters
- * Note: Generally, {@link WeightDecay} (set via {@link #weightDecayBias(double,boolean)} should be preferred to - * L2 regularization. See {@link WeightDecay} javadoc for further details.
- */ - public Builder l2Bias(double l2Bias) { - NetworkUtils.removeInstances(regularizationBias, L2Regularization.class); - if(l2Bias > 0.0) { - NetworkUtils.removeInstancesWithWarning(regularizationBias, WeightDecay.class, "WeightDecay bias regularization removed: incompatible with added L2 regularization"); - regularizationBias.add(new L2Regularization(l2Bias)); - } else { - removeL2Bias = true; - } - return this; - } - - /** - * Add weight decay regularization for the network parameters (excluding biases).
- * This applies weight decay with multiplying the learning rate - see {@link WeightDecay} for more details.
- * - * @param coefficient Weight decay regularization coefficient - * @see #weightDecay(double, boolean) - */ - public Builder weightDecay(double coefficient) { - return weightDecay(coefficient, true); - } - - /** - * Add weight decay regularization for the network parameters (excluding biases). See {@link WeightDecay} for more details.
- * - * @param coefficient Weight decay regularization coefficient - * @param applyLR Whether the learning rate should be multiplied in when performing weight decay updates. See {@link WeightDecay} for more details. - * @see #weightDecay(double, boolean) - */ - public Builder weightDecay(double coefficient, boolean applyLR) { - //Check if existing weight decay if it exists; if so, replace it. Also remove L2 - it doesn't make sense to use both - NetworkUtils.removeInstances(this.regularization, WeightDecay.class); - if(coefficient > 0.0) { - NetworkUtils.removeInstancesWithWarning(this.regularization, L2Regularization.class, "L2 regularization removed: incompatible with added WeightDecay regularization"); - this.regularization.add(new WeightDecay(coefficient, applyLR)); - } else { - removeWD = true; - } - return this; - } - - /** - * Weight decay for the biases only - see {@link #weightDecay(double)} for more details. - * This applies weight decay with multiplying the learning rate.
- * - * @param coefficient Weight decay regularization coefficient - * @see #weightDecayBias(double, boolean) - */ - public Builder weightDecayBias(double coefficient) { - return weightDecayBias(coefficient, true); - } - - /** - * Weight decay for the biases only - see {@link #weightDecay(double)} for more details
- * - * @param coefficient Weight decay regularization coefficient - */ - public Builder weightDecayBias(double coefficient, boolean applyLR) { - //Check if existing weight decay if it exists; if so, replace it. Also remove L2 - it doesn't make sense to use both - NetworkUtils.removeInstances(this.regularizationBias, WeightDecay.class); - if(coefficient > 0) { - NetworkUtils.removeInstancesWithWarning(this.regularizationBias, L2Regularization.class, "L2 bias regularization removed: incompatible with added WeightDecay regularization"); - this.regularizationBias.add(new WeightDecay(coefficient, applyLR)); - } else { - removeWDBias = true; - } - return this; - } - - /** - * Set the dropout - * - * @param dropout Dropout, such as {@link Dropout}, {@link org.deeplearning4j.nn.conf.dropout.GaussianDropout}, - * {@link org.deeplearning4j.nn.conf.dropout.GaussianNoise} etc - */ - public Builder dropout(IDropout dropout) { - this.dropout = Optional.ofNullable(dropout); - return this; - } - - /** - * Dropout probability. This is the probability of retaining each input activation value for a layer. - * dropOut(x) will keep an input activation with probability x, and set to 0 with probability 1-x.
- * dropOut(0.0) is a special value / special case - when set to 0.0., dropout is disabled (not applied). Note - * that a dropout value of 1.0 is functionally equivalent to no dropout: i.e., 100% probability of retaining - * each input activation.
- *

- * Note 1: Dropout is applied at training time only - and is automatically not applied at test time - * (for evaluation, etc)
- * Note 2: This sets the probability per-layer. Care should be taken when setting lower values for - * complex networks (too much information may be lost with aggressive (very low) dropout values).
- * Note 3: Frequently, dropout is not applied to (or, has higher retain probability for) input (first layer) - * layers. Dropout is also often not applied to output layers. This needs to be handled MANUALLY by the user - * - set .dropout(0) on those layers when using global dropout setting.
- * Note 4: Implementation detail (most users can ignore): DL4J uses inverted dropout, as described here: - * http://cs231n.github.io/neural-networks-2/ - *

- * - * @param inputRetainProbability Dropout probability (probability of retaining each input activation value for a layer) - * @see #dropout(IDropout) - */ - public Builder dropOut(double inputRetainProbability){ - if(inputRetainProbability == 0.0){ - return dropout(null); - } - return dropout(new Dropout(inputRetainProbability)); - } - - /** - * Set the weight noise (such as {@link org.deeplearning4j.nn.conf.weightnoise.DropConnect} and - * {@link org.deeplearning4j.nn.conf.weightnoise.WeightNoise}) - * - * @param weightNoise Weight noise instance to use - */ - public Builder weightNoise(IWeightNoise weightNoise) { - this.weightNoise = Optional.ofNullable(weightNoise); - return this; - } - - /** - * Gradient updater configuration. For example, {@link org.nd4j.linalg.learning.config.Adam} - * or {@link org.nd4j.linalg.learning.config.Nesterovs} - * - * @param updater Updater to use - */ - public Builder updater(IUpdater updater) { - this.updater = updater; - return this; - } - - /** - * @deprecated Use {@link #updater(IUpdater)} - */ - @Deprecated - public Builder updater(Updater updater) { - return updater(updater.getIUpdaterWithDefaultConfig()); - } - - /** - * Gradient updater configuration, for the biases only. If not set, biases will use the updater as - * set by {@link #updater(IUpdater)} - * - * @param biasUpdater Updater to use for bias parameters - */ - public Builder biasUpdater(IUpdater biasUpdater) { - this.biasUpdater = biasUpdater; - return this; - } - - /** - * Whether scores and gradients should be divided by the minibatch size.
- * Most users should leave this ast he default value of true. - */ - public Builder miniBatch(boolean miniBatch) { - this.miniBatch = miniBatch; - return this; - } - - public Builder maxNumLineSearchIterations(int maxNumLineSearchIterations) { - this.maxNumLineSearchIterations = maxNumLineSearchIterations; - return this; - } - - /** - * RNG seed for reproducibility - * @param seed RNG seed to use - */ - public Builder seed(long seed) { - this.seed = seed; - return this; - } - - /** - * RNG seed for reproducibility - * @param seed RNG seed to use - */ - public Builder seed(int seed){ - return seed((long)seed); - } - - public Builder optimizationAlgo(OptimizationAlgorithm optimizationAlgo) { - this.optimizationAlgo = optimizationAlgo; - return this; - } - - public Builder stepFunction(StepFunction stepFunction) { - this.stepFunction = stepFunction; - return this; - } - - public Builder minimize(boolean minimize) { - this.minimize = minimize; - return this; - } - - /** - * Gradient normalization strategy. Used to specify gradient renormalization, gradient clipping etc. - * See {@link GradientNormalization} for details - * - * @param gradientNormalization Type of normalization to use. Defaults to None. - * @see GradientNormalization - */ - public Builder gradientNormalization(GradientNormalization gradientNormalization) { - this.gradientNormalization = Optional.ofNullable(gradientNormalization); - return this; - } - - /** - * Threshold for gradient normalization, only used for GradientNormalization.ClipL2PerLayer, - * GradientNormalization.ClipL2PerParamType, and GradientNormalization.ClipElementWiseAbsoluteValue
- * Not used otherwise.
- * L2 threshold for first two types of clipping, or absolute value threshold for last type of clipping - */ - public Builder gradientNormalizationThreshold(double gradientNormalizationThreshold) { - this.gradientNormalizationThreshold = gradientNormalizationThreshold; - return this; - } - - /** - * Sets the convolution mode for convolutional layers, which impacts padding and output sizes. - * See {@link ConvolutionMode} for details. Defaults to ConvolutionMode.TRUNCATE
- * @param convolutionMode Convolution mode to use - */ - public Builder convolutionMode(ConvolutionMode convolutionMode) { - this.convolutionMode = convolutionMode; - return this; - } - - /** - * Sets the cuDNN algo mode for convolutional layers, which impacts performance and memory usage of cuDNN. - * See {@link ConvolutionLayer.AlgoMode} for details. Defaults to "PREFER_FASTEST", but "NO_WORKSPACE" uses less memory. - */ - public Builder cudnnAlgoMode(ConvolutionLayer.AlgoMode cudnnAlgoMode) { - this.cudnnAlgoMode = cudnnAlgoMode; - return this; - } - - /** - * Set constraints to be applied to all layers. Default: no constraints.
- * Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm regularization, - * etc). These constraints are applied at each iteration, after the parameters have been updated. - * - * @param constraints Constraints to apply to all parameters of all layers - */ - public Builder constraints(List constraints) { - this.constraints = Optional.ofNullable(constraints); - return this; - } - - public Builder pretrain(boolean pretrain) { - this.pretrain = pretrain; - return this; - } - - public Builder backprop(boolean backprop) { - this.backprop = backprop; - return this; - } - - /** - * The type of backprop. Default setting is used for most networks (MLP, CNN etc), - * but optionally truncated BPTT can be used for training recurrent neural networks. - * If using TruncatedBPTT make sure you set both tBPTTForwardLength() and tBPTTBackwardLength() - * - * @param backpropType Type of backprop. Default: BackpropType.Standard - */ - public Builder backpropType(BackpropType backpropType) { - this.backpropType = backpropType; - return this; - } - - /** - * When doing truncated BPTT: how many steps of forward pass should we do - * before doing (truncated) backprop?
- * Only applicable when doing backpropType(BackpropType.TruncatedBPTT)
- * Typically tBPTTForwardLength parameter is same as the tBPTTBackwardLength parameter, - * but may be larger than it in some circumstances (but never smaller)
- * Ideally your training data time series length should be divisible by this - * This is the k1 parameter on pg23 of - * http://www.cs.utoronto.ca/~ilya/pubs/ilya_sutskever_phd_thesis.pdf - * - * @param tbpttFwdLength Forward length > 0, >= backwardLength - */ - public Builder tbpttFwdLength(int tbpttFwdLength) { - this.tbpttFwdLength = tbpttFwdLength; - return this; - } - - /** - * When doing truncated BPTT: how many steps of backward should we do?
- * Only applicable when doing backpropType(BackpropType.TruncatedBPTT)
- * This is the k2 parameter on pg23 of - * http://www.cs.utoronto.ca/~ilya/pubs/ilya_sutskever_phd_thesis.pdf - * - * @param tbpttBackLength <= forwardLength - */ - public Builder tbpttBackLength(int tbpttBackLength) { - this.tbpttBackLength = tbpttBackLength; - return this; - } - - /** - * This method defines Workspace mode being used during training: - * NONE: workspace won't be used - * ENABLED: workspaces will be used for training (reduced memory and better performance) - * - * @param trainingWorkspaceMode Workspace mode for training - * @return Builder - */ - public Builder trainingWorkspaceMode(WorkspaceMode trainingWorkspaceMode) { - this.trainingWorkspaceMode = trainingWorkspaceMode; - return this; - } - - /** - * This method defines Workspace mode being used during inference:
- * NONE: workspace won't be used
- * ENABLED: workspaces will be used for inference (reduced memory and better performance) - * - * @param inferenceWorkspaceMode Workspace mode for inference - * @return Builder - */ - public Builder inferenceWorkspaceMode(WorkspaceMode inferenceWorkspaceMode) { - this.inferenceWorkspaceMode = inferenceWorkspaceMode; - return this; - } - - public FineTuneConfiguration build() { - return new FineTuneConfiguration(activation, weightInitFn, biasInit, regularization, regularizationBias, - removeL2, removeL2Bias, removeL1, removeL1Bias, removeWD, removeWDBias, dropout, - weightNoise, updater, biasUpdater, miniBatch, maxNumLineSearchIterations, seed, optimizationAlgo, stepFunction, - minimize, gradientNormalization, gradientNormalizationThreshold, convolutionMode, cudnnAlgoMode, constraints, - pretrain, backprop, backpropType, tbpttFwdLength, tbpttBackLength, trainingWorkspaceMode, inferenceWorkspaceMode); - } + this.weightInitFn = weightInit.getWeightInitFunction(); + return this; } - public NeuralNetConfiguration appliedNeuralNetConfiguration(NeuralNetConfiguration nnc) { - applyToNeuralNetConfiguration(nnc); - nnc = new NeuralNetConfiguration.Builder(nnc.clone()).build(); - return nnc; + /** + * Set weight initialization scheme to random sampling via the specified distribution. + * Equivalent to: {@code .weightInit(new WeightInitDistribution(distribution))} + * + * @param distribution Distribution to use for weight initialization + */ + public Builder weightInit(Distribution distribution) { + return weightInit(new WeightInitDistribution(distribution)); } - public void applyToNeuralNetConfiguration(NeuralNetConfiguration nnc) { - - Layer l = nnc.getLayer(); - Updater originalUpdater = null; - WeightInit origWeightInit = null; - - if (l != null) { - //As per NeuralNetConfiguration.configureLayer and LayerValidation.configureBaseLayer: only copy dropout to base layers - // this excludes things like subsampling and activation layers - if (dropout != null && l instanceof BaseLayer) { - IDropout d = dropout.orElse(null); - if(d != null) - d = d.clone(); //Clone to avoid shared state between layers - l.setIDropout(d); - } - if(constraints != null) - l.setConstraints(constraints.orElse(null)); - } - - if (l != null && l instanceof BaseLayer) { - BaseLayer bl = (BaseLayer) l; - if (activationFn != null) - bl.setActivationFn(activationFn); - if (weightInitFn != null) - bl.setWeightInitFn(weightInitFn); - if (biasInit != null) - bl.setBiasInit(biasInit); - if (regularization != null && !regularization.isEmpty()) - bl.setRegularization(regularization); - if (regularizationBias != null && !regularizationBias.isEmpty()) - bl.setRegularizationBias(regularizationBias); - if (removeL2) - NetworkUtils.removeInstances(bl.getRegularization(), L2Regularization.class); - if (removeL2Bias) - NetworkUtils.removeInstances(bl.getRegularizationBias(), L2Regularization.class); - if (removeL1) - NetworkUtils.removeInstances(bl.getRegularization(), L1Regularization.class); - if (removeL1Bias) - NetworkUtils.removeInstances(bl.getRegularizationBias(), L1Regularization.class); - if (removeWD) - NetworkUtils.removeInstances(bl.getRegularization(), WeightDecay.class); - if (removeWDBias) - NetworkUtils.removeInstances(bl.getRegularizationBias(), WeightDecay.class); - if (gradientNormalization != null) - bl.setGradientNormalization(gradientNormalization.orElse(null)); - if (gradientNormalizationThreshold != null) - bl.setGradientNormalizationThreshold(gradientNormalizationThreshold); - if (updater != null){ - bl.setIUpdater(updater); - } - if (biasUpdater != null){ - bl.setBiasUpdater(biasUpdater); - } - if (weightNoise != null){ - bl.setWeightNoise(weightNoise.orElse(null)); - } - } - if (miniBatch != null) - nnc.setMiniBatch(miniBatch); - if (maxNumLineSearchIterations != null) - nnc.setMaxNumLineSearchIterations(maxNumLineSearchIterations); - if (seed != null) - nnc.setSeed(seed); - if (optimizationAlgo != null) - nnc.setOptimizationAlgo(optimizationAlgo); - if (stepFunction != null) - nnc.setStepFunction(stepFunction); - if (minimize != null) - nnc.setMinimize(minimize); - - if (convolutionMode != null && l instanceof ConvolutionLayer) { - ((ConvolutionLayer) l).setConvolutionMode(convolutionMode); - } - if (cudnnAlgoMode != null && l instanceof ConvolutionLayer) { - ((ConvolutionLayer) l).setCudnnAlgoMode(cudnnAlgoMode); - } - if (convolutionMode != null && l instanceof SubsamplingLayer) { - ((SubsamplingLayer) l).setConvolutionMode(convolutionMode); - } - - //Perform validation - if (l != null) { - LayerValidation.generalValidation(l.getLayerName(), l, get(dropout), regularization, regularizationBias, - get(constraints), null, null); - } + /** + * Constant for bias initialization. Default: 0.0 + * + * @param biasInit Constant for bias initialization + */ + public Builder biasInit(double biasInit) { + this.biasInit = biasInit; + return this; } - private static T get(Optional optional){ - if(optional == null){ - return null; - } - return optional.orElse(null); + /** + * Distribution to sample initial weights from. Equivalent to: + * {@code .weightInit(new WeightInitDistribution(distribution))} + */ + @Deprecated + public Builder dist(Distribution dist) { + return weightInit(dist); } - public void applyToMultiLayerConfiguration(MultiLayerConfiguration conf) { - if (backpropType != null) - conf.setBackpropType(backpropType); - if (tbpttFwdLength != null) - conf.setTbpttFwdLength(tbpttFwdLength); - if (tbpttBackLength != null) - conf.setTbpttBackLength(tbpttBackLength); + /** + * L1 regularization coefficient for the weights (excluding biases) + */ + public Builder l1(double l1) { + NetworkUtils.removeInstances(regularization, L1Regularization.class); + if (l1 > 0.0) { + regularization.add(new L1Regularization(l1)); + } + return this; } - public void applyToComputationGraphConfiguration(ComputationGraphConfiguration conf) { - if (backpropType != null) - conf.setBackpropType(backpropType); - if (tbpttFwdLength != null) - conf.setTbpttFwdLength(tbpttFwdLength); - if (tbpttBackLength != null) - conf.setTbpttBackLength(tbpttBackLength); + /** + * L2 regularization coefficient for the weights (excluding biases)
+ * Note: Generally, {@link WeightDecay} (set via {@link #weightDecay(double, boolean)} + * should be preferred to + * L2 regularization. See {@link WeightDecay} javadoc for further details.
+ */ + public Builder l2(double l2) { + NetworkUtils.removeInstances(regularization, L2Regularization.class); + if (l2 > 0.0) { + NetworkUtils.removeInstancesWithWarning(regularization, WeightDecay.class, + "WeightDecay regularization removed: incompatible with added L2 regularization"); + regularization.add(new L2Regularization(l2)); + } else { + removeL2 = true; + } + return this; } - public NeuralNetConfiguration.Builder appliedNeuralNetConfigurationBuilder() { - NeuralNetConfiguration.Builder confBuilder = new NeuralNetConfiguration.Builder(); - if (activationFn != null) - confBuilder.setActivationFn(activationFn); - if (weightInitFn != null) - confBuilder.setWeightInitFn(weightInitFn); - if (biasInit != null) - confBuilder.setBiasInit(biasInit); - if (regularization != null) - confBuilder.setRegularization(regularization); - if (regularizationBias != null) - confBuilder.setRegularizationBias(regularizationBias); - if (dropout != null) - confBuilder.setIdropOut(dropout.orElse(null)); - if (updater != null) - confBuilder.updater(updater); - if(biasUpdater != null) - confBuilder.biasUpdater(biasUpdater); - if (miniBatch != null) - confBuilder.setMiniBatch(miniBatch); - if (maxNumLineSearchIterations != null) - confBuilder.setMaxNumLineSearchIterations(maxNumLineSearchIterations); - if (seed != null) - confBuilder.setSeed(seed); - if (optimizationAlgo != null) - confBuilder.setOptimizationAlgo(optimizationAlgo); - if (stepFunction != null) - confBuilder.setStepFunction(stepFunction); - if (minimize != null) - confBuilder.setMinimize(minimize); - if (gradientNormalization != null) - confBuilder.setGradientNormalization(gradientNormalization.orElse(null)); - if (gradientNormalizationThreshold != null) - confBuilder.setGradientNormalizationThreshold(gradientNormalizationThreshold); - if (trainingWorkspaceMode != null) - confBuilder.trainingWorkspaceMode(trainingWorkspaceMode); - if (inferenceWorkspaceMode != null) - confBuilder.inferenceWorkspaceMode(inferenceWorkspaceMode); - return confBuilder; + /** + * L1 regularization coefficient for the bias parameters + */ + public Builder l1Bias(double l1Bias) { + NetworkUtils.removeInstances(regularizationBias, L1Regularization.class); + if (l1Bias > 0.0) { + regularizationBias.add(new L1Regularization(l1Bias)); + } else { + removeL1Bias = true; + } + return this; } - - public String toJson() { - try { - return NeuralNetConfiguration.mapper().writeValueAsString(this); - } catch (JsonProcessingException e) { - throw new RuntimeException(e); - } + /** + * L2 regularization coefficient for the bias parameters
+ * Note: Generally, {@link WeightDecay} (set via {@link #weightDecayBias(double, boolean)} + * should be preferred to + * L2 regularization. See {@link WeightDecay} javadoc for further details.
+ */ + public Builder l2Bias(double l2Bias) { + NetworkUtils.removeInstances(regularizationBias, L2Regularization.class); + if (l2Bias > 0.0) { + NetworkUtils.removeInstancesWithWarning(regularizationBias, WeightDecay.class, + "WeightDecay bias regularization removed: incompatible with added L2 regularization"); + regularizationBias.add(new L2Regularization(l2Bias)); + } else { + removeL2Bias = true; + } + return this; } - public String toYaml() { - try { - return NeuralNetConfiguration.mapperYaml().writeValueAsString(this); - } catch (JsonProcessingException e) { - throw new RuntimeException(e); - } + /** + * Add weight decay regularization for the network parameters (excluding biases).
This + * applies weight decay with multiplying the learning rate - see {@link WeightDecay} for + * more details.
+ * + * @param coefficient Weight decay regularization coefficient + * @see #weightDecay(double, boolean) + */ + public Builder weightDecay(double coefficient) { + return weightDecay(coefficient, true); } - public static FineTuneConfiguration fromJson(String json) { - try { - return NeuralNetConfiguration.mapper().readValue(json, FineTuneConfiguration.class); - } catch (IOException e) { - throw new RuntimeException(e); - } + /** + * Add weight decay regularization for the network parameters (excluding biases). See + * {@link WeightDecay} for more details.
+ * + * @param coefficient Weight decay regularization coefficient + * @param applyLR Whether the learning rate should be multiplied in when performing weight + * decay updates. See {@link WeightDecay} for more details. + * @see #weightDecay(double, boolean) + */ + public Builder weightDecay(double coefficient, boolean applyLR) { + //Check if existing weight decay if it exists; if so, replace it. Also remove L2 - it doesn't make sense to use both + NetworkUtils.removeInstances(this.regularization, WeightDecay.class); + if (coefficient > 0.0) { + NetworkUtils.removeInstancesWithWarning(this.regularization, L2Regularization.class, + "L2 regularization removed: incompatible with added WeightDecay regularization"); + this.regularization.add(new WeightDecay(coefficient, applyLR)); + } else { + removeWD = true; + } + return this; } - public static FineTuneConfiguration fromYaml(String yaml) { - try { - return NeuralNetConfiguration.mapperYaml().readValue(yaml, FineTuneConfiguration.class); - } catch (IOException e) { - throw new RuntimeException(e); - } + /** + * Weight decay for the biases only - see {@link #weightDecay(double)} for more details. This + * applies weight decay with multiplying the learning rate.
+ * + * @param coefficient Weight decay regularization coefficient + * @see #weightDecayBias(double, boolean) + */ + public Builder weightDecayBias(double coefficient) { + return weightDecayBias(coefficient, true); } + + /** + * Weight decay for the biases only - see {@link #weightDecay(double)} for more details
+ * + * @param coefficient Weight decay regularization coefficient + */ + public Builder weightDecayBias(double coefficient, boolean applyLR) { + //Check if existing weight decay if it exists; if so, replace it. Also remove L2 - it doesn't make sense to use both + NetworkUtils.removeInstances(this.regularizationBias, WeightDecay.class); + if (coefficient > 0) { + NetworkUtils.removeInstancesWithWarning(this.regularizationBias, L2Regularization.class, + "L2 bias regularization removed: incompatible with added WeightDecay regularization"); + this.regularizationBias.add(new WeightDecay(coefficient, applyLR)); + } else { + removeWDBias = true; + } + return this; + } + + /** + * Set the dropout + * + * @param dropout Dropout, such as {@link Dropout}, + * {@link org.deeplearning4j.nn.conf.dropout.GaussianDropout}, + * {@link org.deeplearning4j.nn.conf.dropout.GaussianNoise} etc + */ + public Builder dropout(IDropout dropout) { + this.dropout = Optional.ofNullable(dropout); + return this; + } + + /** + * Dropout probability. This is the probability of retaining each input activation + * value for a layer. dropOut(x) will keep an input activation with probability x, and set to 0 + * with probability 1-x.
dropOut(0.0) is a special value / special case - when set to 0.0., + * dropout is disabled (not applied). Note that a dropout value of 1.0 is functionally + * equivalent to no dropout: i.e., 100% probability of retaining each input activation.
+ *

+ * Note 1: Dropout is applied at training time only - and is automatically not applied at test + * time (for evaluation, etc)
Note 2: This sets the probability per-layer. Care should be + * taken when setting lower values for complex networks (too much information may be lost with + * aggressive (very low) dropout values).
Note 3: Frequently, dropout is not applied to (or, + * has higher retain probability for) input (first layer) layers. Dropout is also often not + * applied to output layers. This needs to be handled MANUALLY by the user - set .dropout(0) on + * those layers when using global dropout setting.
Note 4: Implementation detail (most users + * can ignore): DL4J uses inverted dropout, as described here: + * http://cs231n.github.io/neural-networks-2/ + *

+ * + * @param inputRetainProbability Dropout probability (probability of retaining each input + * activation value for a layer) + * @see #dropout(IDropout) + */ + public Builder dropOut(double inputRetainProbability) { + if (inputRetainProbability == 0.0) { + return dropout(null); + } + return dropout(new Dropout(inputRetainProbability)); + } + + /** + * Set the weight noise (such as {@link org.deeplearning4j.nn.conf.weightnoise.DropConnect} and + * {@link org.deeplearning4j.nn.conf.weightnoise.WeightNoise}) + * + * @param weightNoise Weight noise instance to use + */ + public Builder weightNoise(IWeightNoise weightNoise) { + this.weightNoise = Optional.ofNullable(weightNoise); + return this; + } + + /** + * Gradient updater configuration. For example, {@link org.nd4j.linalg.learning.config.Adam} or + * {@link org.nd4j.linalg.learning.config.Nesterovs} + * + * @param updater Updater to use + */ + public Builder updater(IUpdater updater) { + this.updater = updater; + return this; + } + + /** + * @deprecated Use {@link #updater(IUpdater)} + */ + @Deprecated + public Builder updater(Updater updater) { + return updater(updater.getIUpdaterWithDefaultConfig()); + } + + /** + * Gradient updater configuration, for the biases only. If not set, biases will use the updater + * as set by {@link #updater(IUpdater)} + * + * @param biasUpdater Updater to use for bias parameters + */ + public Builder biasUpdater(IUpdater biasUpdater) { + this.biasUpdater = biasUpdater; + return this; + } + + /** + * Whether scores and gradients should be divided by the minibatch size.
Most users should + * leave this ast he default value of true. + */ + public Builder miniBatch(boolean miniBatch) { + this.miniBatch = miniBatch; + return this; + } + + public Builder maxNumLineSearchIterations(int maxNumLineSearchIterations) { + this.maxNumLineSearchIterations = maxNumLineSearchIterations; + return this; + } + + /** + * RNG seed for reproducibility + * + * @param seed RNG seed to use + */ + public Builder seed(long seed) { + this.seed = seed; + return this; + } + + /** + * RNG seed for reproducibility + * + * @param seed RNG seed to use + */ + public Builder seed(int seed) { + return seed((long) seed); + } + + public Builder optimizationAlgo(OptimizationAlgorithm optimizationAlgo) { + this.optimizationAlgo = optimizationAlgo; + return this; + } + + public Builder stepFunction(StepFunction stepFunction) { + this.stepFunction = stepFunction; + return this; + } + + public Builder minimize(boolean minimize) { + this.minimize = minimize; + return this; + } + + /** + * Gradient normalization strategy. Used to specify gradient renormalization, gradient clipping + * etc. See {@link GradientNormalization} for details + * + * @param gradientNormalization Type of normalization to use. Defaults to None. + * @see GradientNormalization + */ + public Builder gradientNormalization(GradientNormalization gradientNormalization) { + this.gradientNormalization = Optional.ofNullable(gradientNormalization); + return this; + } + + /** + * Threshold for gradient normalization, only used for GradientNormalization.ClipL2PerLayer, + * GradientNormalization.ClipL2PerParamType, and + * GradientNormalization.ClipElementWiseAbsoluteValue
Not used otherwise.
L2 threshold + * for first two types of clipping, or absolute value threshold for last type of clipping + */ + public Builder gradientNormalizationThreshold(double gradientNormalizationThreshold) { + this.gradientNormalizationThreshold = gradientNormalizationThreshold; + return this; + } + + /** + * Sets the convolution mode for convolutional layers, which impacts padding and output sizes. + * See {@link ConvolutionMode} for details. Defaults to ConvolutionMode.TRUNCATE
+ * + * @param convolutionMode Convolution mode to use + */ + public Builder convolutionMode(ConvolutionMode convolutionMode) { + this.convolutionMode = convolutionMode; + return this; + } + + /** + * Sets the cuDNN algo mode for convolutional layers, which impacts performance and memory usage + * of cuDNN. See {@link ConvolutionLayer.AlgoMode} for details. Defaults to "PREFER_FASTEST", + * but "NO_WORKSPACE" uses less memory. + */ + public Builder cudnnAlgoMode(ConvolutionLayer.AlgoMode cudnnAlgoMode) { + this.cudnnAlgoMode = cudnnAlgoMode; + return this; + } + + /** + * Set constraints to be applied to all layers. Default: no constraints.
Constraints can be + * used to enforce certain conditions (non-negativity of parameters, max-norm regularization, + * etc). These constraints are applied at each iteration, after the parameters have been + * updated. + * + * @param constraints Constraints to apply to all parameters of all layers + */ + public Builder constraints(List constraints) { + this.constraints = Optional.ofNullable(constraints); + return this; + } + + public Builder pretrain(boolean pretrain) { + this.pretrain = pretrain; + return this; + } + + public Builder backprop(boolean backprop) { + this.backprop = backprop; + return this; + } + + /** + * The type of backprop. Default setting is used for most networks (MLP, CNN etc), but + * optionally truncated BPTT can be used for training recurrent neural networks. If using + * TruncatedBPTT make sure you set both tBPTTForwardLength() and tBPTTBackwardLength() + * + * @param backpropType Type of backprop. Default: BackpropType.Standard + */ + public Builder backpropType(BackpropType backpropType) { + this.backpropType = backpropType; + return this; + } + + /** + * When doing truncated BPTT: how many steps of forward pass should we do before doing + * (truncated) backprop?
Only applicable when doing + * backpropType(BackpropType.TruncatedBPTT)
Typically tBPTTForwardLength parameter is same + * as the tBPTTBackwardLength parameter, but may be larger than it in some circumstances (but + * never smaller)
Ideally your training data time series length should be divisible by this + * This is the k1 parameter on pg23 of + * http://www.cs.utoronto.ca/~ilya/pubs/ilya_sutskever_phd_thesis.pdf + * + * @param tbpttFwdLength Forward length > 0, >= backwardLength + */ + public Builder tbpttFwdLength(int tbpttFwdLength) { + this.tbpttFwdLength = tbpttFwdLength; + return this; + } + + /** + * When doing truncated BPTT: how many steps of backward should we do?
Only applicable when + * doing backpropType(BackpropType.TruncatedBPTT)
This is the k2 parameter on pg23 of + * http://www.cs.utoronto.ca/~ilya/pubs/ilya_sutskever_phd_thesis.pdf + * + * @param tbpttBackLength <= forwardLength + */ + public Builder tbpttBackLength(int tbpttBackLength) { + this.tbpttBackLength = tbpttBackLength; + return this; + } + + /** + * This method defines Workspace mode being used during training: NONE: workspace won't be used + * ENABLED: workspaces will be used for training (reduced memory and better performance) + * + * @param trainingWorkspaceMode Workspace mode for training + * @return Builder + */ + public Builder trainingWorkspaceMode(WorkspaceMode trainingWorkspaceMode) { + this.trainingWorkspaceMode = trainingWorkspaceMode; + return this; + } + + /** + * This method defines Workspace mode being used during inference:
NONE: workspace won't be + * used
ENABLED: workspaces will be used for inference (reduced memory and better + * performance) + * + * @param inferenceWorkspaceMode Workspace mode for inference + * @return Builder + */ + public Builder inferenceWorkspaceMode(WorkspaceMode inferenceWorkspaceMode) { + this.inferenceWorkspaceMode = inferenceWorkspaceMode; + return this; + } + + public FineTuneConfiguration build() { + return new FineTuneConfiguration(activation, weightInitFn, biasInit, regularization, + regularizationBias, + removeL2, removeL2Bias, removeL1, removeL1Bias, removeWD, removeWDBias, dropout, + weightNoise, updater, biasUpdater, miniBatch, maxNumLineSearchIterations, seed, + optimizationAlgo, stepFunction, + minimize, gradientNormalization, gradientNormalizationThreshold, convolutionMode, + cudnnAlgoMode, constraints, + pretrain, backprop, backpropType, tbpttFwdLength, tbpttBackLength, trainingWorkspaceMode, + inferenceWorkspaceMode); + } + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearning.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearning.java index b941cf636..8cc50854b 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearning.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearning.java @@ -28,7 +28,7 @@ import org.deeplearning4j.nn.conf.graph.GraphVertex; import org.deeplearning4j.nn.conf.graph.LayerVertex; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.FeedForwardLayer; -import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.graph.vertex.VertexIndices; import org.deeplearning4j.nn.graph.vertex.impl.FrozenVertex; @@ -51,7 +51,7 @@ import java.util.*; public class TransferLearning { public static class Builder { - private final MultiLayerConfiguration origConf; + private final NeuralNetConfiguration origConf; private final MultiLayerNetwork origModel; private MultiLayerNetwork editedModel; @@ -64,9 +64,9 @@ public class TransferLearning { new HashMap<>(); private final Map> nInEditedMap = new HashMap<>(); private final List editedParams = new ArrayList<>(); - private final List editedConfs = new ArrayList<>(); + private final List editedConfs = new ArrayList<>(); private final List appendParams = new ArrayList<>(); //these could be new arrays, and views from origParams - private final List appendConfs = new ArrayList<>(); + private final List appendConfs = new ArrayList<>(); private Map inputPreProcessors = new HashMap<>(); @@ -80,8 +80,8 @@ public class TransferLearning { */ public Builder(MultiLayerNetwork origModel) { this.origModel = origModel; - this.origConf = origModel.getLayerWiseConfigurations().clone(); - this.dataType = origModel.getLayerWiseConfigurations().getDataType(); + this.origConf = origModel.getNetConfiguration().clone(); + this.dataType = origModel.getNetConfiguration().getDataType(); this.inputPreProcessors = origConf.getInputPreProcessors(); } @@ -299,31 +299,31 @@ public class TransferLearning { * At the very least an outputLayer must be added (output layer should be added last - as per the note on order) * Learning configs (like updaters, learning rate etc) specified with the layer here will be honored * - * @param layer layer conf to add (similar to the NeuralNetConfiguration .list().layer(...) + * @param layerConf layer conf to add (similar to the NeuralNetConfiguration .list().layer(...) * @return Builder */ - public Builder addLayer(Layer layer) { + public Builder addLayer(LayerConfiguration layerConf) { if (!prepDone) { doPrep(); } - // Use the fineTune config to create the required NeuralNetConfiguration + Layer instances + // Use the fineTune config to create the required NeuralNetConfiguration + LayerConfiguration instances //instantiate dummy layer to get the params //Build a nn config builder with settings from finetune. Set layer with the added layer //Issue: fine tune config has .learningRate(x), then I add a layer with .learningRate(y)... //We don't want that to be overridden - NeuralNetConfiguration layerConf = - finetuneConfiguration.appliedNeuralNetConfigurationBuilder().layer(layer).build(); + NeuralNetConfiguration netConf = + finetuneConfiguration.appliedNeuralNetConfigurationBuilder(); - val numParams = layer.initializer().numParams(layerConf); + val numParams = layerConf.initializer().numParams(layerConf); INDArray params; if (numParams > 0) { - params = Nd4j.create(origModel.getLayerWiseConfigurations().getDataType(), 1, numParams); - org.deeplearning4j.nn.api.Layer someLayer = layer.instantiate(layerConf, null, 0, params, true, dataType); + params = Nd4j.create(origModel.getNetConfiguration().getDataType(), 1, numParams); + org.deeplearning4j.nn.api.Layer someLayer = layerConf.instantiate(layerConf.getNetConfiguration(), null, 0, params, true, dataType); appendParams.add(someLayer.params()); - appendConfs.add(someLayer.conf()); + appendConfs.add(someLayer.getLayerConfiguration()); } else { appendConfs.add(layerConf); @@ -364,27 +364,27 @@ public class TransferLearning { if (frozenTill != -1) { org.deeplearning4j.nn.api.Layer[] layers = editedModel.getLayers(); for (int i = frozenTill; i >= 0; i--) { - //Complication here: inner Layer (implementation) NeuralNetConfiguration.layer (config) should keep + //Complication here: inner LayerConfiguration (implementation) NeuralNetConfiguration.layer (config) should keep // the original layer config. While network NNC should have the frozen layer, for to/from JSON etc - NeuralNetConfiguration origNNC = editedModel.getLayerWiseConfigurations().getConf(i); - NeuralNetConfiguration layerNNC = origNNC.clone(); - layers[i].setConf(layerNNC); + LayerConfiguration origNNC = editedModel.getNetConfiguration().getFlattenedLayerConfigurations().get(i); + LayerConfiguration layerNNC = origNNC.clone(); + layers[i].setLayerConfiguration(layerNNC); layers[i] = new FrozenLayer(layers[i]); if (origNNC.getVariables() != null) { - List vars = origNNC.variables(true); + List vars = origNNC.getVariables(); origNNC.clearVariables(); layerNNC.clearVariables(); for (String s : vars) { - origNNC.variables(false).add(s); - layerNNC.variables(false).add(s); + origNNC.addVariable(s); + layerNNC.addVariable(s); } } - Layer origLayerConf = editedModel.getLayerWiseConfigurations().getConf(i).getLayer(); - Layer newLayerConf = new org.deeplearning4j.nn.conf.layers.misc.FrozenLayer(origLayerConf); + LayerConfiguration origLayerConf = editedModel.getNetConfiguration().getFlattenedLayerConfigurations().get(i); + LayerConfiguration newLayerConf = new org.deeplearning4j.nn.conf.layers.misc.FrozenLayer(origLayerConf); newLayerConf.setLayerName(origLayerConf.getLayerName()); - editedModel.getLayerWiseConfigurations().getConf(i).setLayer(newLayerConf); + editedModel.getNetConfiguration().getNetConfigurations().get(i).setLayer(newLayerConf); } editedModel.setLayers(layers); } @@ -441,15 +441,14 @@ public class TransferLearning { private void fineTuneConfigurationBuild() { - - for (int i = 0; i < origConf.getConfs().size(); i++) { - NeuralNetConfiguration layerConf; + for (int i = 0; i < origConf.getFlattenedLayerConfigurations().size(); i++) { + LayerConfiguration layerConf; if (finetuneConfiguration != null) { - NeuralNetConfiguration nnc = origConf.getConf(i).clone(); - finetuneConfiguration.applyToNeuralNetConfiguration(nnc); + LayerConfiguration nnc = origConf.getFlattenedLayerConfigurations().get(i).clone(); + finetuneConfiguration.applyToLayerConfiguration(nnc); layerConf = nnc; } else { - layerConf = origConf.getConf(i).clone(); + layerConf = origConf.getFlattenedLayerConfigurations().get(i).clone(); } editedConfs.add(layerConf); } @@ -458,16 +457,16 @@ public class TransferLearning { private void nInReplaceBuild(int layerNum, int nIn, IWeightInit init) { Preconditions.checkArgument(layerNum >= 0 && layerNum < editedConfs.size(), "Invalid layer index: must be 0 to " + "numLayers-1 = %s includive, got %s", editedConfs.size(), layerNum); - NeuralNetConfiguration layerConf = editedConfs.get(layerNum); - Layer layerImpl = layerConf.getLayer(); //not a clone need to modify nOut in place + LayerConfiguration layerConf = editedConfs.get(layerNum); + LayerConfiguration layerImpl = layerConf; //not a clone need to modify nOut in place Preconditions.checkArgument(layerImpl instanceof FeedForwardLayer, "nInReplace can only be applide on FeedForward layers;" + "got layer of type %s", layerImpl.getClass().getSimpleName()); FeedForwardLayer layerImplF = (FeedForwardLayer) layerImpl; layerImplF.setWeightInitFn(init); layerImplF.setNIn(nIn); long numParams = layerImpl.initializer().numParams(layerConf); - INDArray params = Nd4j.create(origModel.getLayerWiseConfigurations().getDataType(), 1, numParams); - org.deeplearning4j.nn.api.Layer someLayer = layerImpl.instantiate(layerConf, null, 0, params, true, dataType); + INDArray params = Nd4j.create(origModel.getNetConfiguration().getDataType(), 1, numParams); + org.deeplearning4j.nn.api.Layer someLayer = layerImpl.instantiate(layerConf.getNetConfiguration(), null, 0, params, true, dataType); editedParams.set(layerNum, someLayer.params()); } @@ -476,29 +475,29 @@ public class TransferLearning { Preconditions.checkArgument(layerNum >= 0 && layerNum < editedConfs.size(), "Invalid layer index: must be 0 to " + "numLayers-1 = %s includive, got %s", editedConfs.size(), layerNum); - NeuralNetConfiguration layerConf = editedConfs.get(layerNum); - Layer layerImpl = layerConf.getLayer(); //not a clone need to modify nOut in place + LayerConfiguration layerConf = editedConfs.get(layerNum); + LayerConfiguration layerImpl = layerConf; //not a clone need to modify nOut in place Preconditions.checkArgument(layerImpl instanceof FeedForwardLayer, "nOutReplace can only be applide on FeedForward layers;" + "got layer of type %s", layerImpl.getClass().getSimpleName()); FeedForwardLayer layerImplF = (FeedForwardLayer) layerImpl; layerImplF.setWeightInitFn(scheme); layerImplF.setNOut(nOut); long numParams = layerImpl.initializer().numParams(layerConf); - INDArray params = Nd4j.create(origModel.getLayerWiseConfigurations().getDataType(), 1, numParams); - org.deeplearning4j.nn.api.Layer someLayer = layerImpl.instantiate(layerConf, null, 0, params, true, dataType); + INDArray params = Nd4j.create(origModel.getNetConfiguration().getDataType(), 1, numParams); + org.deeplearning4j.nn.api.Layer someLayer = layerImpl.instantiate(layerConf.getNetConfiguration(), null, 0, params, true, dataType); editedParams.set(layerNum, someLayer.params()); if (layerNum + 1 < editedConfs.size()) { layerConf = editedConfs.get(layerNum + 1); - layerImpl = layerConf.getLayer(); //modify in place + layerImpl = layerConf; //modify in place if(layerImpl instanceof FeedForwardLayer) { layerImplF = (FeedForwardLayer) layerImpl; layerImplF.setWeightInitFn(schemeNext); layerImplF.setNIn(nOut); numParams = layerImpl.initializer().numParams(layerConf); if (numParams > 0) { - params = Nd4j.create(origModel.getLayerWiseConfigurations().getDataType(), 1, numParams); - someLayer = layerImpl.instantiate(layerConf, null, 0, params, true, dataType); + params = Nd4j.create(origModel.getNetConfiguration().getDataType(), 1, numParams); + someLayer = layerImpl.instantiate(layerConf.getNetConfiguration(), null, 0, params, true, dataType); editedParams.set(layerNum + 1, someLayer.params()); } } @@ -526,27 +525,27 @@ public class TransferLearning { } } - private MultiLayerConfiguration constructConf() { + private NeuralNetConfiguration constructConf() { //use the editedConfs list to make a new config - List allConfs = new ArrayList<>(); + List allConfs = new ArrayList<>(); allConfs.addAll(editedConfs); allConfs.addAll(appendConfs); //Set default layer names, if not set - as per NeuralNetConfiguration.ListBuilder.build() for (int i = 0; i < allConfs.size(); i++) { - if (allConfs.get(i).getLayer().getLayerName() == null) { - allConfs.get(i).getLayer().setLayerName("layer" + i); + if (allConfs.get(i).getLayerName() == null) { + allConfs.get(i).setLayerName("layer" + i); } } - MultiLayerConfiguration conf = new MultiLayerConfiguration.Builder().inputPreProcessors(inputPreProcessors) - .setInputType(this.inputType).confs(allConfs) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().inputPreProcessors(inputPreProcessors) + .inputType(this.inputType) + .layersFromList(allConfs) + .validateOutputLayerConfig(validateOutputLayerConfig == null || validateOutputLayerConfig) .dataType(origConf.getDataType()) .build(); - if (finetuneConfiguration != null) { - finetuneConfiguration.applyToMultiLayerConfiguration(conf); - } + return conf; } } @@ -590,10 +589,10 @@ public class TransferLearning { for (Map.Entry gv : vertices.entrySet()) { if (gv.getValue() instanceof LayerVertex) { LayerVertex lv = (LayerVertex) gv.getValue(); - NeuralNetConfiguration nnc = lv.getLayerConf().clone(); - fineTuneConfiguration.applyToNeuralNetConfiguration(nnc); + NeuralNetConfiguration nnc = lv.getNetConfiguration().clone(); + fineTuneConfiguration.applyToLayerConfiguration(lv.getLayerConfiguration()); vertices.put(gv.getKey(), new LayerVertex(nnc, lv.getPreProcessor())); - nnc.getLayer().setLayerName(gv.getKey()); + lv.getLayerConfiguration().setLayerName(gv.getKey()); } } @@ -725,14 +724,14 @@ public class TransferLearning { * @return GraphBuilder */ public GraphBuilder nInReplace(String layerName, int nIn, IWeightInit scheme) { - Preconditions.checkState(origGraph.getVertex(layerName) != null, "Layer with name %s not found", + Preconditions.checkState(origGraph.getVertex(layerName) != null, "LayerConfiguration with name %s not found", layerName); Preconditions.checkState(origGraph.getVertex(layerName).hasLayer(), "nInReplace can only be applied" + " on vertices with layers. Vertex %s does not have a layer", layerName); initBuilderIfReq(); - NeuralNetConfiguration layerConf = origGraph.getLayer(layerName).conf(); - Layer layerImpl = layerConf.getLayer().clone(); + LayerConfiguration layerConf = origGraph.getLayer(layerName).getLayerConfiguration(); + LayerConfiguration layerImpl = layerConf.clone(); Preconditions.checkState(layerImpl instanceof FeedForwardLayer, "Can only use nInReplace on FeedForward layers;" + "got layer of type %s for layer name %s", layerImpl.getClass().getSimpleName(), layerName); @@ -744,7 +743,7 @@ public class TransferLearning { if(editedVertices.contains(layerName) && editedConfigBuilder.getVertices().get(layerName) instanceof LayerVertex && nInFromNewConfig.containsKey(layerName)){ - Layer l = ((LayerVertex)editedConfigBuilder.getVertices().get(layerName)).getLayerConf().getLayer(); + LayerConfiguration l = ((LayerVertex)editedConfigBuilder.getVertices().get(layerName)).getLayerConfiguration(); if(l instanceof FeedForwardLayer){ layerImplF.setNIn(nInFromNewConfig.get(layerName)); } @@ -764,8 +763,8 @@ public class TransferLearning { if (origGraph.getVertex(layerName).hasLayer()) { - NeuralNetConfiguration layerConf = origGraph.getLayer(layerName).conf(); - Layer layerImpl = layerConf.getLayer().clone(); + LayerConfiguration layerConf = origGraph.getLayer(layerName).getLayerConfiguration(); + LayerConfiguration layerImpl = layerConf.clone(); layerImpl.resetLayerDefaultConfig(); FeedForwardLayer layerImplF = (FeedForwardLayer) layerImpl; layerImplF.setWeightInitFn(scheme); @@ -773,7 +772,7 @@ public class TransferLearning { if(editedVertices.contains(layerName) && editedConfigBuilder.getVertices().get(layerName) instanceof LayerVertex && nInFromNewConfig.containsKey(layerName)){ - Layer l = ((LayerVertex)editedConfigBuilder.getVertices().get(layerName)).getLayerConf().getLayer(); + LayerConfiguration l = ((LayerVertex)editedConfigBuilder.getVertices().get(layerName)).getLayerConfiguration(); if(l instanceof FeedForwardLayer){ layerImplF.setNIn(nInFromNewConfig.get(layerName)); } @@ -802,10 +801,10 @@ public class TransferLearning { throw new UnsupportedOperationException( "Cannot modify nOut of a layer vertex that feeds non-layer vertices. Use removeVertexKeepConnections followed by addVertex instead"); } - layerConf = origGraph.getLayer(fanoutVertexName).conf(); - if(!(layerConf.getLayer() instanceof FeedForwardLayer)) + layerConf = origGraph.getLayer(fanoutVertexName).getLayerConfiguration(); + if(!(layerConf instanceof FeedForwardLayer)) continue; - layerImpl = layerConf.getLayer().clone(); + layerImpl = layerConf.clone(); layerImplF = (FeedForwardLayer) layerImpl; layerImplF.setWeightInitFn(schemeNext); layerImplF.setNIn(nOut); @@ -859,7 +858,7 @@ public class TransferLearning { * @param layerInputs * @return */ - public GraphBuilder addLayer(String layerName, Layer layer, String... layerInputs) { + public GraphBuilder addLayer(String layerName, LayerConfiguration layer, String... layerInputs) { initBuilderIfReq(); editedConfigBuilder.addLayer(layerName, layer, null, layerInputs); editedVertices.add(layerName); @@ -874,7 +873,7 @@ public class TransferLearning { * @param layerInputs * @return */ - public GraphBuilder addLayer(String layerName, Layer layer, InputPreProcessor preProcessor, + public GraphBuilder addLayer(String layerName, LayerConfiguration layer, InputPreProcessor preProcessor, String... layerInputs) { initBuilderIfReq(); editedConfigBuilder.addLayer(layerName, layer, preProcessor, layerInputs); @@ -1009,24 +1008,24 @@ public class TransferLearning { String layerName = gv.getVertexName(); LayerVertex currLayerVertex = (LayerVertex) newConfig.getVertices().get(layerName); - Layer origLayerConf = currLayerVertex.getLayerConf().getLayer(); - Layer newLayerConf = new org.deeplearning4j.nn.conf.layers.misc.FrozenLayer(origLayerConf); + LayerConfiguration origLayerConf = currLayerVertex.getLayerConfiguration(); + LayerConfiguration newLayerConf = new org.deeplearning4j.nn.conf.layers.misc.FrozenLayer(origLayerConf); newLayerConf.setLayerName(origLayerConf.getLayerName()); - //Complication here(and reason for clone on next line): inner Layer (implementation) + //Complication here(and reason for clone on next line): inner LayerConfiguration (implementation) // NeuralNetConfiguration.layer (config) should keep the original layer config. While network // NNC should have the frozen layer - NeuralNetConfiguration newNNC = currLayerVertex.getLayerConf().clone(); - currLayerVertex.setLayerConf(newNNC); - currLayerVertex.getLayerConf().setLayer(newLayerConf); + NeuralNetConfiguration newNNC = currLayerVertex.getNetConfiguration().clone(); + currLayerVertex.setNetConfiguration(newNNC); + currLayerVertex.getNetConfiguration().setLayer(newLayerConf); //Make sure the underlying layer doesn't change: - List vars = currLayerVertex.getLayerConf().variables(true); - currLayerVertex.getLayerConf().clearVariables(); + List vars = currLayerVertex.getNetConfiguration().netWideVariables(true); + currLayerVertex.getNetConfiguration().clearNetWideVariable(); for (String s : vars) { - newNNC.variables(false).add(s); + newNNC.netWideVariables(false).add(s); } - //We also need to place the layer in the CompGraph Layer[] (replacing the old one) + //We also need to place the layer in the CompGraph LayerConfiguration[] (replacing the old one) //This could no doubt be done more efficiently org.deeplearning4j.nn.api.Layer[] layers = newGraph.getLayers(); for (int j = 0; j < layers.length; j++) { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearningHelper.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearningHelper.java index a6f7d6c4f..effc48ad4 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearningHelper.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearningHelper.java @@ -21,7 +21,6 @@ package org.deeplearning4j.nn.transferlearning; import org.apache.commons.lang3.ArrayUtils; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.graph.vertex.GraphVertex; @@ -179,7 +178,7 @@ public class TransferLearningHelper { org.deeplearning4j.nn.api.Layer l = gv.getLayer(); gv.setLayerAsFrozen(); - //We also need to place the layer in the CompGraph Layer[] (replacing the old one) + //We also need to place the layer in the CompGraph LayerConfiguration[] (replacing the old one) //This could no doubt be done more efficiently org.deeplearning4j.nn.api.Layer[] layers = origGraph.getLayers(); for (int j = 0; j < layers.length; j++) { @@ -282,16 +281,16 @@ public class TransferLearningHelper { } List allConfs = new ArrayList<>(); for (int i = frozenInputLayer + 1; i < origMLN.getnLayers(); i++) { - allConfs.add(origMLN.getLayer(i).conf()); + allConfs.add(origMLN.getLayer(i).getNetConfiguration()); } - MultiLayerConfiguration c = origMLN.getLayerWiseConfigurations(); + NeuralNetConfiguration c = origMLN.getNetConfiguration(); - unFrozenSubsetMLN = new MultiLayerNetwork(new MultiLayerConfiguration.Builder() + unFrozenSubsetMLN = new MultiLayerNetwork(NeuralNetConfiguration.builder() .inputPreProcessors(c.getInputPreProcessors()) - .backpropType(c.getBackpropType()).tBPTTForwardLength(c.getTbpttFwdLength()) - .tBPTTBackwardLength(c.getTbpttBackLength()).confs(allConfs) - .dataType(origMLN.getLayerWiseConfigurations().getDataType()) + .backpropType(c.getBackpropType()).tbpttFwdLength(c.getTbpttFwdLength()) + .tbpttBackLength(c.getTbpttBackLength()).confs(allConfs) + .dataType(origMLN.getNetConfiguration().getDataType()) .build()); unFrozenSubsetMLN.init(); //copy over params diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/BaseMultiLayerUpdater.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/BaseMultiLayerUpdater.java index 91d24de46..dfcef372c 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/BaseMultiLayerUpdater.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/BaseMultiLayerUpdater.java @@ -21,7 +21,7 @@ package org.deeplearning4j.nn.updater; import lombok.Getter; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.api.Trainable; import org.deeplearning4j.nn.api.Updater; import org.deeplearning4j.nn.conf.GradientNormalization; @@ -44,7 +44,7 @@ import org.nd4j.linalg.learning.config.IUpdater; import java.util.*; @Getter -public abstract class BaseMultiLayerUpdater implements Updater { +public abstract class BaseMultiLayerUpdater implements Updater { protected final T network; protected Map layersByName; @@ -81,7 +81,7 @@ public abstract class BaseMultiLayerUpdater implements Updater int paramsViewSoFar = 0; int currentUpdaterOffset = 0; for (int i = 0; i < layers.length; i++) { - Map layerParamTable = layers[i].paramTable(false); + Map layerParamTable = layers[i].getParamTable(false); if (layerParamTable != null) { List variables = new ArrayList<>(layerParamTable.keySet()); //Is from a set, but iteration order should be fixed per layer as it's a from a LinkedHashSet for (int j = 0; j < variables.size(); j++) { @@ -351,8 +351,8 @@ public abstract class BaseMultiLayerUpdater implements Updater long currentStart = 0; long currentEnd = 0; for(Trainable t : getOrderedLayers()){ - Set layerParams = t.paramTable(false).keySet(); - Map paramTable = t.paramTable(false); + Set layerParams = t.getParamTable(false).keySet(); + Map paramTable = t.getParamTable(false); for(String s : layerParams) { if(t.updaterDivideByMinibatch(s)){ long l = paramTable.get(s).length(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/LayerUpdater.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/LayerUpdater.java index dea50edd9..f27e7dcfa 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/LayerUpdater.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/LayerUpdater.java @@ -42,7 +42,7 @@ public class LayerUpdater extends BaseMultiLayerUpdater { } layersByName = new HashMap<>(); - layersByName.put(layer.conf().getLayer().getLayerName(), layer); + layersByName.put(layer.getLayerConfiguration().getLayerName(), layer); } @Override @@ -62,7 +62,7 @@ public class LayerUpdater extends BaseMultiLayerUpdater { @Override protected boolean isMiniBatch() { - return network.conf().isMiniBatch(); + return network.getNetConfiguration().isMiniBatch(); } @Override diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/MultiLayerUpdater.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/MultiLayerUpdater.java index 58f64f66f..f43aa85d2 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/MultiLayerUpdater.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/MultiLayerUpdater.java @@ -71,7 +71,7 @@ public class MultiLayerUpdater extends BaseMultiLayerUpdater @Override protected boolean isMiniBatch() { - return network.conf().isMiniBatch(); + return network.getNetConfiguration().isMiniBatch(); } @Override diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/UpdaterCreator.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/UpdaterCreator.java index 3194de852..14850eafb 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/UpdaterCreator.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/UpdaterCreator.java @@ -21,7 +21,7 @@ package org.deeplearning4j.nn.updater; import org.deeplearning4j.nn.api.Layer; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.updater.graph.ComputationGraphUpdater; @@ -35,7 +35,7 @@ public class UpdaterCreator { private UpdaterCreator() {} - public static org.deeplearning4j.nn.api.Updater getUpdater(Model layer) { + public static org.deeplearning4j.nn.api.Updater getUpdater(IModel layer) { if (layer instanceof MultiLayerNetwork) { return new MultiLayerUpdater((MultiLayerNetwork) layer); } else if (layer instanceof ComputationGraph) { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/graph/ComputationGraphUpdater.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/graph/ComputationGraphUpdater.java index 6af2901d6..1c39f52e1 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/graph/ComputationGraphUpdater.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/graph/ComputationGraphUpdater.java @@ -20,7 +20,6 @@ package org.deeplearning4j.nn.updater.graph; -import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.Trainable; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.graph.vertex.GraphVertex; @@ -90,8 +89,10 @@ public class ComputationGraphUpdater extends BaseMultiLayerUpdater listeners; - private Model model; + private IModel model; private ConvexOptimizer optimizer; private StepFunction stepFunction; @@ -90,7 +90,7 @@ public class Solver { public static class Builder { private NeuralNetConfiguration conf; - private Model model; + private IModel model; private final List listeners = new ArrayList<>(); public Builder configure(NeuralNetConfiguration conf) { @@ -112,7 +112,7 @@ public class Solver { return this; } - public Builder model(Model model) { + public Builder model(IModel model) { this.model = model; return this; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/api/BaseTrainingListener.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/api/BaseTrainingListener.java index c7d755187..d72b836f5 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/api/BaseTrainingListener.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/api/BaseTrainingListener.java @@ -20,7 +20,7 @@ package org.deeplearning4j.optimize.api; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.nd4j.linalg.api.ndarray.INDArray; import java.util.List; @@ -29,43 +29,43 @@ import java.util.Map; public abstract class BaseTrainingListener implements TrainingListener { @Override - public void onEpochStart(Model model) { + public void onEpochStart(IModel model) { //No op } @Override - public void onEpochEnd(Model model) { + public void onEpochEnd(IModel model) { //No op } @Override - public void onForwardPass(Model model, List activations) { + public void onForwardPass(IModel model, List activations) { //No op } @Override - public void onForwardPass(Model model, Map activations) { + public void onForwardPass(IModel model, Map activations) { //No op } @Override - public void onGradientCalculation(Model model) { + public void onGradientCalculation(IModel model) { //No op } @Override - public void onBackwardPass(Model model) { + public void onBackwardPass(IModel model) { //No op } @Override - public void iterationDone(Model model, int iteration, int epoch) { + public void iterationDone(IModel model, int iteration, int epoch) { //No op } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/api/ConvexOptimizer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/api/ConvexOptimizer.java index c32a0fac3..0d6999fce 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/api/ConvexOptimizer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/api/ConvexOptimizer.java @@ -20,7 +20,7 @@ package org.deeplearning4j.optimize.api; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.api.Updater; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.gradient.Gradient; @@ -128,6 +128,6 @@ public interface ConvexOptimizer extends Serializable { * @param batchSize batchSize for update * @paramType paramType to update */ - void updateGradientAccordingToParams(Gradient gradient, Model model, int batchSize, LayerWorkspaceMgr workspaceMgr); + void updateGradientAccordingToParams(Gradient gradient, IModel model, int batchSize, LayerWorkspaceMgr workspaceMgr); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/api/IterationListener.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/api/IterationListener.java index 085d734b1..309f478fe 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/api/IterationListener.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/api/IterationListener.java @@ -21,7 +21,7 @@ package org.deeplearning4j.optimize.api; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import java.io.Serializable; @@ -33,6 +33,6 @@ public abstract class IterationListener extends BaseTrainingListener implements * @param iteration the iteration * @param model the model iterating */ - public abstract void iterationDone(Model model, int iteration, int epoch); + public abstract void iterationDone(IModel model, int iteration, int epoch); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/api/TrainingListener.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/api/TrainingListener.java index 81a2d8465..20fe978dc 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/api/TrainingListener.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/api/TrainingListener.java @@ -20,7 +20,7 @@ package org.deeplearning4j.optimize.api; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator; @@ -35,19 +35,19 @@ public interface TrainingListener { * @param iteration the iteration * @param model the model iterating */ - void iterationDone(Model model, int iteration, int epoch); + void iterationDone(IModel model, int iteration, int epoch); /** * Called once at the start of each epoch, when using methods such as {@link org.deeplearning4j.nn.multilayer.MultiLayerNetwork#fit(DataSetIterator)}, * {@link org.deeplearning4j.nn.graph.ComputationGraph#fit(DataSetIterator)} or {@link org.deeplearning4j.nn.graph.ComputationGraph#fit(MultiDataSetIterator)} */ - void onEpochStart(Model model); + void onEpochStart(IModel model); /** * Called once at the end of each epoch, when using methods such as {@link org.deeplearning4j.nn.multilayer.MultiLayerNetwork#fit(DataSetIterator)}, * {@link org.deeplearning4j.nn.graph.ComputationGraph#fit(DataSetIterator)} or {@link org.deeplearning4j.nn.graph.ComputationGraph#fit(MultiDataSetIterator)} */ - void onEpochEnd(Model model); + void onEpochEnd(IModel model); /** * Called once per iteration (forward pass) for activations (usually for a {@link org.deeplearning4j.nn.multilayer.MultiLayerNetwork}), @@ -56,7 +56,7 @@ public interface TrainingListener { * @param model Model * @param activations ILayer activations (including input) */ - void onForwardPass(Model model, List activations); + void onForwardPass(IModel model, List activations); /** * Called once per iteration (forward pass) for activations (usually for a {@link org.deeplearning4j.nn.graph.ComputationGraph}), @@ -65,30 +65,30 @@ public interface TrainingListener { * @param model Model * @param activations ILayer activations (including input) */ - void onForwardPass(Model model, Map activations); + void onForwardPass(IModel model, Map activations); /** * Called once per iteration (backward pass) before the gradients are updated - * Gradients are available via {@link Model#gradient()}. + * Gradients are available via {@link IModel#gradient()}. * Note that gradients will likely be updated in-place - thus they should be copied or processed synchronously * in this method. *

- * For updates (gradients post learning rate/momentum/rmsprop etc) see {@link #onBackwardPass(Model)} + * For updates (gradients post learning rate/momentum/rmsprop etc) see {@link #onBackwardPass(IModel)} * * @param model Model */ - void onGradientCalculation(Model model); + void onGradientCalculation(IModel model); /** * Called once per iteration (backward pass) after gradients have been calculated, and updated - * Gradients are available via {@link Model#gradient()}. + * Gradients are available via {@link IModel#gradient()}. *

- * Unlike {@link #onGradientCalculation(Model)} the gradients at this point will be post-update, rather than + * Unlike {@link #onGradientCalculation(IModel)} the gradients at this point will be post-update, rather than * raw (pre-update) gradients at that method call. * * @param model Model */ - void onBackwardPass(Model model); + void onBackwardPass(IModel model); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/CheckpointListener.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/CheckpointListener.java index 550e4425b..120099da4 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/CheckpointListener.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/CheckpointListener.java @@ -23,8 +23,8 @@ package org.deeplearning4j.optimize.listeners; import com.google.common.io.Files; import lombok.NonNull; import lombok.extern.slf4j.Slf4j; +import net.brutex.ai.dnn.api.IModel; import org.apache.commons.io.IOUtils; -import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.optimize.api.BaseTrainingListener; @@ -109,7 +109,7 @@ public class CheckpointListener extends BaseTrainingListener implements Serializ } @Override - public void onEpochEnd(Model model) { + public void onEpochEnd(IModel model) { int epochsDone = getEpoch(model) + 1; if(saveEveryNEpochs != null && epochsDone > 0 && epochsDone % saveEveryNEpochs == 0){ //Save: @@ -119,7 +119,7 @@ public class CheckpointListener extends BaseTrainingListener implements Serializ } @Override - public void iterationDone(Model model, int iteration, int epoch) { + public void iterationDone(IModel model, int iteration, int epoch) { if (startTime < 0) { startTime = System.currentTimeMillis(); startIter = iteration; @@ -164,7 +164,7 @@ public class CheckpointListener extends BaseTrainingListener implements Serializ } } - private void saveCheckpoint(Model model) { + private void saveCheckpoint(IModel model) { try{ saveCheckpointHelper(model); } catch (Exception e){ @@ -172,7 +172,7 @@ public class CheckpointListener extends BaseTrainingListener implements Serializ } } - private void saveCheckpointHelper(Model model) throws Exception { + private void saveCheckpointHelper(IModel model) throws Exception { if(!checkpointRecordFile.exists()){ checkpointRecordFile.createNewFile(); write(Checkpoint.getFileHeader() + "\n", checkpointRecordFile); @@ -243,27 +243,27 @@ public class CheckpointListener extends BaseTrainingListener implements Serializ return str; } - protected static int getIter(Model model) { + protected static int getIter(IModel model) { if (model instanceof MultiLayerNetwork) { - return ((MultiLayerNetwork) model).getLayerWiseConfigurations().getIterationCount(); + return ((MultiLayerNetwork) model).getNetConfiguration().getIterationCount(); } else if (model instanceof ComputationGraph) { return ((ComputationGraph) model).getComputationGraphConfiguration().getIterationCount(); } else { - return model.conf().getIterationCount(); + return model.getNetConfiguration().getIterationCount(); } } - protected static int getEpoch(Model model) { + protected static int getEpoch(IModel model) { if (model instanceof MultiLayerNetwork) { - return ((MultiLayerNetwork) model).getLayerWiseConfigurations().getEpochCount(); + return ((MultiLayerNetwork) model).getNetConfiguration().getEpochCount(); } else if (model instanceof ComputationGraph) { return ((ComputationGraph) model).getComputationGraphConfiguration().getEpochCount(); } else { - return model.conf().getEpochCount(); + return model.getNetConfiguration().getEpochCount(); } } - protected static String getModelType(Model model){ + protected static String getModelType(IModel model){ if(model.getClass() == MultiLayerNetwork.class){ return "MultiLayerNetwork"; } else if(model.getClass() == ComputationGraph.class){ diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/CollectScoresIterationListener.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/CollectScoresIterationListener.java index 51f798e26..0692387cf 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/CollectScoresIterationListener.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/CollectScoresIterationListener.java @@ -20,7 +20,7 @@ package org.deeplearning4j.optimize.listeners; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.optimize.api.BaseTrainingListener; import java.io.File; @@ -132,7 +132,7 @@ public class CollectScoresIterationListener extends BaseTrainingListener { } @Override - public void iterationDone(Model model, int iteration, int epoch) { + public void iterationDone(IModel model, int iteration, int epoch) { if (++iterationCount % frequency == 0) { double score = model.score(); scoreVsIter.reallocateGuard(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/CollectScoresListener.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/CollectScoresListener.java index 4f6d17b3c..558b3eb92 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/CollectScoresListener.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/CollectScoresListener.java @@ -25,7 +25,7 @@ import it.unimi.dsi.fastutil.ints.IntArrayList; import lombok.Data; import lombok.EqualsAndHashCode; import lombok.extern.slf4j.Slf4j; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.optimize.api.BaseTrainingListener; @@ -53,7 +53,7 @@ public class CollectScoresListener extends BaseTrainingListener implements Seria } @Override - public void iterationDone(Model model, int iteration, int epoch) { + public void iterationDone(IModel model, int iteration, int epoch) { if(iteration % frequency == 0){ double score = model.score(); listIteration.add(iteration); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/ComposableIterationListener.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/ComposableIterationListener.java index 3b82fc6b2..4b67fcede 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/ComposableIterationListener.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/ComposableIterationListener.java @@ -20,7 +20,7 @@ package org.deeplearning4j.optimize.listeners; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.optimize.api.BaseTrainingListener; import org.deeplearning4j.optimize.api.TrainingListener; @@ -42,7 +42,7 @@ public class ComposableIterationListener extends BaseTrainingListener implements } @Override - public void iterationDone(Model model, int iteration, int epoch) { + public void iterationDone(IModel model, int iteration, int epoch) { for (TrainingListener listener : listeners) listener.iterationDone(model, iteration, epoch); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/EvaluativeListener.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/EvaluativeListener.java index a05d14a87..f98dd0aad 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/EvaluativeListener.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/EvaluativeListener.java @@ -24,8 +24,8 @@ import lombok.Getter; import lombok.NonNull; import lombok.Setter; import lombok.extern.slf4j.Slf4j; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.exception.DL4JInvalidInputException; -import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.optimize.api.BaseTrainingListener; @@ -39,8 +39,6 @@ import org.nd4j.linalg.dataset.MultiDataSet; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator; -import java.util.List; -import java.util.Map; import java.util.concurrent.atomic.AtomicLong; @Slf4j @@ -193,24 +191,24 @@ public class EvaluativeListener extends BaseTrainingListener { * @param iteration the iteration */ @Override - public void iterationDone(Model model, int iteration, int epoch) { + public void iterationDone(IModel model, int iteration, int epoch) { if (invocationType == InvocationType.ITERATION_END) invokeListener(model); } @Override - public void onEpochStart(Model model) { + public void onEpochStart(IModel model) { if (invocationType == InvocationType.EPOCH_START) invokeListener(model); } @Override - public void onEpochEnd(Model model) { + public void onEpochEnd(IModel model) { if (invocationType == InvocationType.EPOCH_END) invokeListener(model); } - protected void invokeListener(Model model) { + protected void invokeListener(IModel model) { if (iterationCount.get() == null) iterationCount.set(new AtomicLong(0)); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/FailureTestingListener.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/FailureTestingListener.java index c05626511..d6ac11b41 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/FailureTestingListener.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/FailureTestingListener.java @@ -25,7 +25,7 @@ import lombok.Data; import lombok.EqualsAndHashCode; import lombok.NonNull; import lombok.extern.slf4j.Slf4j; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.optimize.api.TrainingListener; @@ -51,41 +51,41 @@ public class FailureTestingListener implements TrainingListener, Serializable { } @Override - public void iterationDone(Model model, int iteration, int epoch) { + public void iterationDone(IModel model, int iteration, int epoch) { call(CallType.ITER_DONE, model); } @Override - public void onEpochStart(Model model) { + public void onEpochStart(IModel model) { call(CallType.EPOCH_START, model); } @Override - public void onEpochEnd(Model model) { + public void onEpochEnd(IModel model) { call(CallType.EPOCH_END, model); } @Override - public void onForwardPass(Model model, List activations) { + public void onForwardPass(IModel model, List activations) { call(CallType.FORWARD_PASS, model); } @Override - public void onForwardPass(Model model, Map activations) { + public void onForwardPass(IModel model, Map activations) { call(CallType.FORWARD_PASS, model); } @Override - public void onGradientCalculation(Model model) { + public void onGradientCalculation(IModel model) { call(CallType.GRADIENT_CALC, model); } @Override - public void onBackwardPass(Model model) { + public void onBackwardPass(IModel model) { call(CallType.BACKWARD_PASS, model); } - protected void call(CallType callType, Model model){ + protected void call(CallType callType, IModel model){ if(!trigger.initialized()){ trigger.initialize(); } @@ -149,7 +149,7 @@ public class FailureTestingListener implements TrainingListener, Serializable { * @param model Model * @return */ - public abstract boolean triggerFailure(CallType callType, int iteration, int epoch, Model model); + public abstract boolean triggerFailure(CallType callType, int iteration, int epoch, IModel model); public boolean initialized(){ return initialized; @@ -170,7 +170,7 @@ public class FailureTestingListener implements TrainingListener, Serializable { } @Override - public boolean triggerFailure(CallType callType, int iteration, int epoch, Model model) { + public boolean triggerFailure(CallType callType, int iteration, int epoch, IModel model) { boolean b = true; for(FailureTrigger ft : triggers) b &= ft.triggerFailure(callType, iteration, epoch, model); @@ -191,7 +191,7 @@ public class FailureTestingListener implements TrainingListener, Serializable { } @Override - public boolean triggerFailure(CallType callType, int iteration, int epoch, Model model) { + public boolean triggerFailure(CallType callType, int iteration, int epoch, IModel model) { boolean b = false; for(FailureTrigger ft : triggers) b |= ft.triggerFailure(callType, iteration, epoch, model); @@ -213,7 +213,7 @@ public class FailureTestingListener implements TrainingListener, Serializable { } @Override - public boolean triggerFailure(CallType callType, int iteration, int epoch, Model model) { + public boolean triggerFailure(CallType callType, int iteration, int epoch, IModel model) { return (this.callType == CallType.ANY || callType == this.callType) && rng.nextDouble() < probability; } @@ -237,7 +237,7 @@ public class FailureTestingListener implements TrainingListener, Serializable { } @Override - public boolean triggerFailure(CallType callType, int iteration, int epoch, Model model) { + public boolean triggerFailure(CallType callType, int iteration, int epoch, IModel model) { return (System.currentTimeMillis() - initTime) > msSinceInit; } @@ -260,7 +260,7 @@ public class FailureTestingListener implements TrainingListener, Serializable { @Override - public boolean triggerFailure(CallType callType, int iteration, int epoch, Model model) { + public boolean triggerFailure(CallType callType, int iteration, int epoch, IModel model) { return shouldFail; } @@ -284,7 +284,7 @@ public class FailureTestingListener implements TrainingListener, Serializable { @Override - public boolean triggerFailure(CallType callType, int iteration, int epoch, Model model) { + public boolean triggerFailure(CallType callType, int iteration, int epoch, IModel model) { return shouldFail; } @@ -314,7 +314,7 @@ public class FailureTestingListener implements TrainingListener, Serializable { } @Override - public boolean triggerFailure(CallType callType, int iteration, int epoch, Model model) { + public boolean triggerFailure(CallType callType, int iteration, int epoch, IModel model) { return (isEpoch && epoch == count) || (!isEpoch && iteration == count); } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/PerformanceListener.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/PerformanceListener.java index 68402f40e..ff76fbfc0 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/PerformanceListener.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/PerformanceListener.java @@ -22,14 +22,12 @@ package org.deeplearning4j.optimize.listeners; import com.google.common.base.Preconditions; import lombok.extern.slf4j.Slf4j; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.optimize.api.BaseTrainingListener; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import java.io.IOException; import java.io.ObjectInputStream; @@ -78,7 +76,7 @@ public class PerformanceListener extends BaseTrainingListener implements Seriali } @Override - public void iterationDone(Model model, int iteration, int epoch) { + public void iterationDone(IModel model, int iteration, int epoch) { // we update lastTime on every iteration // just to simplify things if (lastTime.get() == null) diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/ScoreIterationListener.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/ScoreIterationListener.java index 6568d2f67..2d8cc1829 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/ScoreIterationListener.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/ScoreIterationListener.java @@ -21,10 +21,8 @@ package org.deeplearning4j.optimize.listeners; import lombok.extern.slf4j.Slf4j; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.optimize.api.BaseTrainingListener; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import java.io.Serializable; @@ -43,7 +41,7 @@ public class ScoreIterationListener extends BaseTrainingListener implements Seri public ScoreIterationListener() {} @Override - public void iterationDone(Model model, int iteration, int epoch) { + public void iterationDone(IModel model, int iteration, int epoch) { if (printIterations <= 0) printIterations = 1; if (iteration % printIterations == 0) { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/ScoreToChartListener.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/ScoreToChartListener.java index 2fc2999d6..1a8620a48 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/ScoreToChartListener.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/ScoreToChartListener.java @@ -26,7 +26,7 @@ import lombok.extern.slf4j.Slf4j; import okhttp3.OkHttpClient; import okhttp3.Request; import okhttp3.Response; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.optimize.api.BaseTrainingListener; @Slf4j @@ -40,7 +40,7 @@ public class ScoreToChartListener extends BaseTrainingListener { } @Override - public void iterationDone(Model model, int iteration, int epoch) { + public void iterationDone(IModel model, int iteration, int epoch) { double score = model.score(); String nurl = url+"s="+score+"&n="+seriesName; OkHttpClient client = new OkHttpClient(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/SleepyTrainingListener.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/SleepyTrainingListener.java index 4c262a64c..834778001 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/SleepyTrainingListener.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/SleepyTrainingListener.java @@ -22,7 +22,7 @@ package org.deeplearning4j.optimize.listeners; import lombok.*; import lombok.extern.slf4j.Slf4j; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.optimize.api.BaseTrainingListener; import org.nd4j.common.util.ThreadUtils; import org.nd4j.linalg.api.ndarray.INDArray; @@ -160,7 +160,7 @@ public class SleepyTrainingListener extends BaseTrainingListener implements Seri } @Override - public void onEpochStart(Model model) { + public void onEpochStart(IModel model) { sleep(lastES.get(), timerES); if (lastES.get() == null) @@ -170,7 +170,7 @@ public class SleepyTrainingListener extends BaseTrainingListener implements Seri } @Override - public void onEpochEnd(Model model) { + public void onEpochEnd(IModel model) { sleep(lastEE.get(), timerEE); if (lastEE.get() == null) @@ -180,7 +180,7 @@ public class SleepyTrainingListener extends BaseTrainingListener implements Seri } @Override - public void onForwardPass(Model model, List activations) { + public void onForwardPass(IModel model, List activations) { sleep(lastFF.get(), timerFF); if (lastFF.get() == null) @@ -190,7 +190,7 @@ public class SleepyTrainingListener extends BaseTrainingListener implements Seri } @Override - public void onForwardPass(Model model, Map activations) { + public void onForwardPass(IModel model, Map activations) { sleep(lastFF.get(), timerFF); if (lastFF.get() == null) @@ -200,7 +200,7 @@ public class SleepyTrainingListener extends BaseTrainingListener implements Seri } @Override - public void iterationDone(Model model, int iteration, int epoch) { + public void iterationDone(IModel model, int iteration, int epoch) { sleep(lastIteration.get(), timerIteration); if (lastIteration.get() == null) @@ -210,7 +210,7 @@ public class SleepyTrainingListener extends BaseTrainingListener implements Seri } @Override - public void onBackwardPass(Model model) { + public void onBackwardPass(IModel model) { sleep(lastBP.get(), timerBP); if (lastBP.get() == null) @@ -220,7 +220,7 @@ public class SleepyTrainingListener extends BaseTrainingListener implements Seri } @Override - public void onGradientCalculation(Model model) { + public void onGradientCalculation(IModel model) { // } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/TimeIterationListener.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/TimeIterationListener.java index cc48c216b..8a947e4c0 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/TimeIterationListener.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/TimeIterationListener.java @@ -21,10 +21,8 @@ package org.deeplearning4j.optimize.listeners; import lombok.extern.slf4j.Slf4j; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.optimize.api.BaseTrainingListener; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import java.io.Serializable; import java.util.Date; @@ -46,7 +44,7 @@ public class TimeIterationListener extends BaseTrainingListener implements Seria } @Override - public void iterationDone(Model model, int iteration, int epoch) { + public void iterationDone(IModel model, int iteration, int epoch) { long currentIteration = iterationCounter.incrementAndGet(); long elapsed = System.currentTimeMillis() - start; long remaining = (iterationCount - currentIteration) * elapsed / currentIteration; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/callbacks/EvaluationCallback.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/callbacks/EvaluationCallback.java index 6cb756bb8..5f14ef3b0 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/callbacks/EvaluationCallback.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/callbacks/EvaluationCallback.java @@ -20,11 +20,11 @@ package org.deeplearning4j.optimize.listeners.callbacks; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.optimize.listeners.EvaluativeListener; import org.nd4j.evaluation.IEvaluation; public interface EvaluationCallback { - void call(EvaluativeListener listener, Model model, long invocationsCount, IEvaluation[] evaluations); + void call(EvaluativeListener listener, IModel model, long invocationsCount, IEvaluation[] evaluations); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/callbacks/ModelSavingCallback.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/callbacks/ModelSavingCallback.java index df46f1fc6..cb0fe44a0 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/callbacks/ModelSavingCallback.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/callbacks/ModelSavingCallback.java @@ -23,7 +23,7 @@ package org.deeplearning4j.optimize.listeners.callbacks; import lombok.NonNull; import org.apache.commons.io.FilenameUtils; import org.deeplearning4j.exception.DL4JInvalidConfigException; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.optimize.listeners.EvaluativeListener; import org.deeplearning4j.util.ModelSerializer; import org.nd4j.evaluation.IEvaluation; @@ -66,7 +66,7 @@ public class ModelSavingCallback implements EvaluationCallback { } @Override - public void call(EvaluativeListener listener, Model model, long invocationsCount, IEvaluation[] evaluations) { + public void call(EvaluativeListener listener, IModel model, long invocationsCount, IEvaluation[] evaluations) { String temp = template.replaceAll("%d", "" + invocationsCount); @@ -81,7 +81,7 @@ public class ModelSavingCallback implements EvaluationCallback { * @param model * @param filename */ - protected void save(Model model, String filename) { + protected void save(IModel model, String filename) { try { ModelSerializer.writeModel(model, filename, true); } catch (IOException e) { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/BackTrackLineSearch.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/BackTrackLineSearch.java index 18e64c081..1f391a3ef 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/BackTrackLineSearch.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/BackTrackLineSearch.java @@ -20,9 +20,9 @@ package org.deeplearning4j.optimize.solvers; +import net.brutex.ai.dnn.api.IModel; import org.apache.commons.math3.util.FastMath; import org.deeplearning4j.exception.InvalidStepException; -import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.nn.conf.stepfunctions.NegativeGradientStepFunction; import org.deeplearning4j.optimize.api.ConvexOptimizer; import org.deeplearning4j.optimize.api.LineOptimizer; @@ -33,7 +33,6 @@ import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ops.impl.scalar.comparison.ScalarSetValue; import org.nd4j.linalg.api.ops.impl.transforms.comparison.Eps; -import org.nd4j.linalg.api.shape.Shape; import org.nd4j.linalg.factory.Nd4j; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.slf4j.Logger; @@ -44,7 +43,7 @@ import static org.nd4j.linalg.ops.transforms.Transforms.abs; public class BackTrackLineSearch implements LineOptimizer { private static final Logger log = LoggerFactory.getLogger(BackTrackLineSearch.class); - private final Model layer; + private final IModel layer; private final StepFunction stepFunction; private final ConvexOptimizer optimizer; private int maxIterations; @@ -64,18 +63,18 @@ public class BackTrackLineSearch implements LineOptimizer { * @param stepFunction * @param optimizer */ - public BackTrackLineSearch(Model layer, StepFunction stepFunction, ConvexOptimizer optimizer) { + public BackTrackLineSearch(IModel layer, StepFunction stepFunction, ConvexOptimizer optimizer) { this.layer = layer; this.stepFunction = stepFunction; this.optimizer = optimizer; - this.maxIterations = layer.conf().getMaxNumLineSearchIterations(); + this.maxIterations = layer.getNetConfiguration().getMaxNumLineSearchIterations(); } /** * @param optimizable * @param optimizer */ - public BackTrackLineSearch(Model optimizable, ConvexOptimizer optimizer) { + public BackTrackLineSearch(IModel optimizable, ConvexOptimizer optimizer) { this(optimizable, new NegativeDefaultStepFunction(), optimizer); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/BaseOptimizer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/BaseOptimizer.java index 42ce490e5..b5e06a3c3 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/BaseOptimizer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/BaseOptimizer.java @@ -21,12 +21,11 @@ package org.deeplearning4j.optimize.solvers; import lombok.Getter; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.exception.InvalidStepException; import org.deeplearning4j.nn.api.Layer; -import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.nn.api.Updater; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.graph.ComputationGraph; @@ -64,7 +63,7 @@ public abstract class BaseOptimizer implements ConvexOptimizer { @Getter protected StepFunction stepFunction; protected Collection trainingListeners = new ArrayList<>(); - protected Model model; + protected IModel model; protected BackTrackLineSearch lineMaximizer; protected Updater updater; protected ComputationGraphUpdater computationGraphUpdater; @@ -90,7 +89,7 @@ public abstract class BaseOptimizer implements ConvexOptimizer { * @param model */ public BaseOptimizer(NeuralNetConfiguration conf, StepFunction stepFunction, - Collection trainingListeners, Model model) { + Collection trainingListeners, IModel model) { this.conf = conf; this.stepFunction = (stepFunction != null ? stepFunction : getDefaultStepFunctionForOptimizer(this.getClass())); this.trainingListeners = trainingListeners != null ? trainingListeners : new ArrayList(); @@ -289,7 +288,7 @@ public abstract class BaseOptimizer implements ConvexOptimizer { @Override - public void updateGradientAccordingToParams(Gradient gradient, Model model, int batchSize, LayerWorkspaceMgr workspaceMgr) { + public void updateGradientAccordingToParams(Gradient gradient, IModel model, int batchSize, LayerWorkspaceMgr workspaceMgr) { if (model instanceof ComputationGraph) { ComputationGraph graph = (ComputationGraph) model; if (computationGraphUpdater == null) { @@ -316,7 +315,7 @@ public abstract class BaseOptimizer implements ConvexOptimizer { */ @Override public void setupSearchState(Pair pair) { - INDArray gradient = pair.getFirst().gradient(conf.variables()); + INDArray gradient = pair.getFirst().gradient(conf.netWideVariables()); INDArray params = model.params().dup(); //Need dup here: params returns an array that isn't a copy (hence changes to this are problematic for line search methods) searchState.put(GRADIENT_KEY, gradient); searchState.put(SCORE_KEY, pair.getSecond()); @@ -332,39 +331,39 @@ public abstract class BaseOptimizer implements ConvexOptimizer { } } - public static int getIterationCount(Model model) { + public static int getIterationCount(IModel model) { if (model instanceof MultiLayerNetwork) { - return ((MultiLayerNetwork) model).getLayerWiseConfigurations().getIterationCount(); + return ((MultiLayerNetwork) model).getNetConfiguration().getIterationCount(); } else if (model instanceof ComputationGraph) { return ((ComputationGraph) model).getComputationGraphConfiguration().getIterationCount(); } else { - return model.conf().getIterationCount(); + return model.getNetConfiguration().getIterationCount(); } } - public static void incrementIterationCount(Model model, int incrementBy) { + public static void incrementIterationCount(IModel model, int incrementBy) { if (model instanceof MultiLayerNetwork) { - MultiLayerConfiguration conf = ((MultiLayerNetwork) model).getLayerWiseConfigurations(); + NeuralNetConfiguration conf = ((MultiLayerNetwork) model).getNetConfiguration(); conf.setIterationCount(conf.getIterationCount() + incrementBy); } else if (model instanceof ComputationGraph) { ComputationGraphConfiguration conf = ((ComputationGraph) model).getComputationGraphConfiguration(); conf.setIterationCount(conf.getIterationCount() + incrementBy); } else { - model.conf().setIterationCount(model.conf().getIterationCount() + incrementBy); + model.getNetConfiguration().setIterationCount(model.getNetConfiguration().getIterationCount() + incrementBy); } } - public static int getEpochCount(Model model){ + public static int getEpochCount(IModel model){ if (model instanceof MultiLayerNetwork) { - return ((MultiLayerNetwork) model).getLayerWiseConfigurations().getEpochCount(); + return ((MultiLayerNetwork) model).getNetConfiguration().getEpochCount(); } else if (model instanceof ComputationGraph) { return ((ComputationGraph) model).getComputationGraphConfiguration().getEpochCount(); } else { - return model.conf().getEpochCount(); + return model.getNetConfiguration().getEpochCount(); } } - public static void applyConstraints(Model model){ + public static void applyConstraints(IModel model){ int iter = getIterationCount(model); int epoch = getEpochCount(model); model.applyConstraints(iter, epoch); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/ConjugateGradient.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/ConjugateGradient.java index b07ade04a..614075e20 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/ConjugateGradient.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/ConjugateGradient.java @@ -20,7 +20,7 @@ package org.deeplearning4j.optimize.solvers; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.optimize.api.StepFunction; import org.deeplearning4j.optimize.api.TrainingListener; @@ -38,7 +38,7 @@ public class ConjugateGradient extends BaseOptimizer { public ConjugateGradient(NeuralNetConfiguration conf, StepFunction stepFunction, - Collection trainingListeners, Model model) { + Collection trainingListeners, IModel model) { super(conf, stepFunction, trainingListeners, model); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/LBFGS.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/LBFGS.java index 5760ee337..3a8fa9bdc 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/LBFGS.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/LBFGS.java @@ -20,7 +20,7 @@ package org.deeplearning4j.optimize.solvers; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.optimize.api.StepFunction; @@ -42,7 +42,7 @@ public class LBFGS extends BaseOptimizer { private final int m = 4; public LBFGS(NeuralNetConfiguration conf, StepFunction stepFunction, - Collection trainingListeners, Model model) { + Collection trainingListeners, IModel model) { super(conf, stepFunction, trainingListeners, model); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/LineGradientDescent.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/LineGradientDescent.java index 2afc53453..78ebf3231 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/LineGradientDescent.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/LineGradientDescent.java @@ -20,7 +20,7 @@ package org.deeplearning4j.optimize.solvers; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.optimize.api.StepFunction; import org.deeplearning4j.optimize.api.TrainingListener; @@ -33,7 +33,7 @@ public class LineGradientDescent extends BaseOptimizer { private static final long serialVersionUID = 6336124657542062284L; public LineGradientDescent(NeuralNetConfiguration conf, StepFunction stepFunction, - Collection trainingListeners, Model model) { + Collection trainingListeners, IModel model) { super(conf, stepFunction, trainingListeners, model); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/StochasticGradientDescent.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/StochasticGradientDescent.java index fbee9c2a3..ee7070f01 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/StochasticGradientDescent.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/StochasticGradientDescent.java @@ -21,7 +21,7 @@ package org.deeplearning4j.optimize.solvers; import lombok.extern.slf4j.Slf4j; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.graph.ComputationGraph; @@ -41,7 +41,7 @@ public class StochasticGradientDescent extends BaseOptimizer { public StochasticGradientDescent(NeuralNetConfiguration conf, StepFunction stepFunction, - Collection trainingListeners, Model model) { + Collection trainingListeners, IModel model) { super(conf, stepFunction, trainingListeners, model); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/EncodedGradientsAccumulator.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/EncodedGradientsAccumulator.java index 490acc178..7684caa6a 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/EncodedGradientsAccumulator.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/EncodedGradientsAccumulator.java @@ -24,8 +24,8 @@ import lombok.Getter; import lombok.NonNull; import lombok.extern.slf4j.Slf4j; import lombok.val; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.exception.DL4JInvalidConfigException; -import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.optimize.api.StepFunction; import org.deeplearning4j.optimize.solvers.accumulation.encoding.ResidualPostProcessor; import org.deeplearning4j.optimize.solvers.accumulation.encoding.ThresholdAlgorithm; @@ -171,7 +171,7 @@ public class EncodedGradientsAccumulator implements GradientsAccumulator, Regist } - public static long getOptimalBufferSize(Model model, int numWorkers, int queueSize) { + public static long getOptimalBufferSize(IModel model, int numWorkers, int queueSize) { return getOptimalBufferSize(model.params().length(), numWorkers, queueSize); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/Convolution1DUtils.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/Convolution1DUtils.java index 53bed93a2..cd3bd3f2c 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/Convolution1DUtils.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/Convolution1DUtils.java @@ -27,7 +27,6 @@ import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.RNNFormat; import org.deeplearning4j.nn.conf.layers.*; -import org.deeplearning4j.nn.conf.layers.convolutional.Cropping1D; import org.deeplearning4j.nn.conf.layers.recurrent.SimpleRnn; import org.nd4j.common.base.Preconditions; import org.nd4j.linalg.api.ndarray.INDArray; @@ -62,7 +61,7 @@ public class Convolution1DUtils { * @return true if the input layer has an rnn format * false otherwise */ - public static boolean hasRnnDataFormat(Layer layer) { + public static boolean hasRnnDataFormat(LayerConfiguration layer) { return layer instanceof Convolution1D || layer instanceof Convolution1DLayer || layer instanceof Subsampling1DLayer || @@ -78,7 +77,7 @@ public class Convolution1DUtils { * @param layer the layer to get the format for * @return the format for the layer */ - public static RNNFormat getRnnFormatFromLayer(Layer layer) { + public static RNNFormat getRnnFormatFromLayer(LayerConfiguration layer) { Preconditions.checkState(hasRnnDataFormat(layer),"ILayer of type " + layer.getClass().getName() + " and name " + layer.getLayerName() + " does not have an RNNFormat"); if(layer instanceof SimpleRnn) { SimpleRnn simpleRnn = (SimpleRnn) layer; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/ConvolutionUtils.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/ConvolutionUtils.java index 616f1c620..e7adaa86a 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/ConvolutionUtils.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/ConvolutionUtils.java @@ -52,7 +52,7 @@ public class ConvolutionUtils { public static final String NCHW_NHWC_ERROR_MSG = "Note: Convolution layers can be configured for either NCHW (channels first)" + " or NHWC (channels last) format for input images and activations.\n" + "Layers can be configured using .dataFormat(CNN2DFormat.NCHW/NHWC) when constructing the layer, or for the entire net using" + - " .setInputType(InputType.convolutional(height, width, depth, CNN2DForman.NCHW/NHWC)).\n" + + " .inputType(InputType.convolutional(height, width, depth, CNN2DForman.NCHW/NHWC)).\n" + "ImageRecordReader and NativeImageLoader can also be configured to load image data in either NCHW or NHWC format which must match the network"; @@ -176,7 +176,7 @@ public class ConvolutionUtils { * @param layer the layer to check * @return true if the layer is one of the above types, false otherwise */ - public static boolean layerHasConvolutionLayout(Layer layer) { + public static boolean layerHasConvolutionLayout(LayerConfiguration layer) { return layer instanceof ConvolutionLayer || layer instanceof SubsamplingLayer || layer instanceof SpaceToBatchLayer || @@ -191,15 +191,15 @@ public class ConvolutionUtils { /** * Get the format for a given layer. - * {@link #layerHasConvolutionLayout(Layer)} - * should return true on the given {@link Layer} + * {@link #layerHasConvolutionLayout(LayerConfiguration)} + * should return true on the given {@link LayerConfiguration} * type or an {@link IllegalArgumentException} * will be thrown * @param layer the input layer * @return the {@link CNN2DFormat} for the given * layer */ - public static CNN2DFormat getFormatForLayer(Layer layer) { + public static CNN2DFormat getFormatForLayer(LayerConfiguration layer) { if(layer instanceof Convolution1DLayer) { Convolution1DLayer convolution1DLayer = (Convolution1DLayer) layer; return convolution1DLayer.getCnn2dDataFormat(); @@ -520,9 +520,9 @@ public class ConvolutionUtils { * @param conf the configuration to get height and width from * @return the configuration to get height and width from */ - public static int[] getHeightAndWidth(NeuralNetConfiguration conf) { + public static int[] getHeightAndWidth(LayerConfiguration conf) { return getHeightAndWidth( - ((org.deeplearning4j.nn.conf.layers.ConvolutionLayer) conf.getLayer()).getKernelSize()); + ((org.deeplearning4j.nn.conf.layers.ConvolutionLayer) conf).getKernelSize()); } @@ -531,8 +531,8 @@ public class ConvolutionUtils { * the number of kernels from * @return the number of kernels/filters to apply */ - public static long numFeatureMap(NeuralNetConfiguration conf) { - return ((org.deeplearning4j.nn.conf.layers.ConvolutionLayer) conf.getLayer()).getNOut(); + public static long numFeatureMap(LayerConfiguration conf) { + return ((org.deeplearning4j.nn.conf.layers.ConvolutionLayer) conf).getNOut(); } /** diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/CrashReportingUtil.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/CrashReportingUtil.java index 5227ad77f..56f7d3b7f 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/CrashReportingUtil.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/CrashReportingUtil.java @@ -41,12 +41,12 @@ import java.util.Set; import lombok.Getter; import lombok.NonNull; import lombok.extern.slf4j.Slf4j; +import net.brutex.ai.dnn.api.IModel; import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.exception.ExceptionUtils; import org.bytedeco.javacpp.Pointer; import org.deeplearning4j.common.config.DL4JSystemProperties; import org.deeplearning4j.nn.api.Layer; -import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.nn.conf.BackpropType; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.graph.ComputationGraph; @@ -136,7 +136,7 @@ public class CrashReportingUtil { * @param net Net to generate the crash dump for. May not be null * @param e Throwable/exception. Stack trace will be included in the network output */ - public static void writeMemoryCrashDump(@NonNull Model net, @NonNull Throwable e){ + public static void writeMemoryCrashDump(@NonNull IModel net, @NonNull Throwable e){ if(!crashDumpsEnabled){ return; } @@ -189,7 +189,7 @@ public class CrashReportingUtil { * @param net Net to generate the report for * @return Report as a String */ - public static String generateMemoryStatus(Model net, int minibatch, InputType... inputTypes){ + public static String generateMemoryStatus(IModel net, int minibatch, InputType... inputTypes){ MultiLayerNetwork mln = null; ComputationGraph cg = null; boolean isMLN; @@ -310,12 +310,12 @@ public class CrashReportingUtil { //Workspaces, backprop type, layer info, activation info, helper info if(isMLN) { - sb.append(f("Backprop Type", mln.getLayerWiseConfigurations().getBackpropType())); - if(mln.getLayerWiseConfigurations().getBackpropType() == BackpropType.TruncatedBPTT){ - sb.append(f("TBPTT Length", mln.getLayerWiseConfigurations().getTbpttFwdLength() + "/" + mln.getLayerWiseConfigurations().getTbpttBackLength())); + sb.append(f("Backprop Type", mln.getNetConfiguration().getBackpropType())); + if(mln.getNetConfiguration().getBackpropType() == BackpropType.TruncatedBPTT){ + sb.append(f("TBPTT Length", mln.getNetConfiguration().getTbpttFwdLength() + "/" + mln.getNetConfiguration().getTbpttBackLength())); } - sb.append(f("Workspace Mode: Training", mln.getLayerWiseConfigurations().getTrainingWorkspaceMode())); - sb.append(f("Workspace Mode: Inference", mln.getLayerWiseConfigurations().getInferenceWorkspaceMode())); + sb.append(f("Workspace Mode: Training", mln.getNetConfiguration().getTrainingWorkspaceMode())); + sb.append(f("Workspace Mode: Inference", mln.getNetConfiguration().getInferenceWorkspaceMode())); appendLayerInformation(sb, mln.getLayers(), bytesPerElement); appendHelperInformation(sb, mln.getLayers()); appendActivationShapes(mln, (inputTypes == null || inputTypes.length == 0 ? null : inputTypes[0]), minibatch, sb, bytesPerElement); @@ -470,7 +470,7 @@ public class CrashReportingUtil { sb.append(String.format(format, "Idx", "Name", "ILayer Type", "ILayer # Parameters", "ILayer Parameter Memory")).append("\n"); for(Layer layer : layers){ long numParams = layer.numParams(); - sb.append(String.format(format, layer.getIndex(), layer.conf().getLayer().getLayerName(), + sb.append(String.format(format, layer.getIndex(), layer.getLayerConfiguration().getLayerName(), layer.getClass().getSimpleName(), numParams, fBytes(numParams * bytesPerElement))).append("\n"); } @@ -503,7 +503,7 @@ public class CrashReportingUtil { } int idx = l.getIndex(); - String layerName = l.conf().getLayer().getLayerName(); + String layerName = l.getLayerConfiguration().getLayerName(); if(layerName == null) layerName = String.valueOf(idx); @@ -549,7 +549,7 @@ public class CrashReportingUtil { sb.append(f("Current Minibatch Size", minibatch)); sb.append(f("Input Shape", Arrays.toString(inputShape))); - List inputTypes = net.getLayerWiseConfigurations().getLayerActivationTypes(inputType); + List inputTypes = net.getNetConfiguration().getLayerActivationTypes(inputType); String format = "%-3s %-20s %-20s %-42s %-20s %-12s %-12s"; sb.append(String.format(format, "Idx", "Name", "ILayer Type", "Activations Type", "Activations Shape", "# Elements", "Memory")).append("\n"); @@ -567,7 +567,7 @@ public class CrashReportingUtil { bytes = 0; } totalActivationBytes += bytes; - sb.append(String.format(format, i, layers[i].conf().getLayer().getLayerName(), layers[i].getClass().getSimpleName(), + sb.append(String.format(format, i, layers[i].getLayerConfiguration().getLayerName(), layers[i].getClass().getSimpleName(), inputTypes.get(i), Arrays.toString(shape), (numElements < 0 ? "" : String.valueOf(numElements)), fBytes(bytes))).append("\n"); last = bytes; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/DL4JModelValidator.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/DL4JModelValidator.java index 6413a5eb4..739d3482c 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/DL4JModelValidator.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/DL4JModelValidator.java @@ -22,9 +22,9 @@ package org.deeplearning4j.util; import lombok.NonNull; import org.apache.commons.io.IOUtils; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.nd4j.common.validation.Nd4jCommonValidator; @@ -47,7 +47,7 @@ public class DL4JModelValidator { /** * Validate whether the file represents a valid MultiLayerNetwork saved previously with {@link MultiLayerNetwork#save(File)} - * or {@link ModelSerializer#writeModel(Model, File, boolean)}, to be read with {@link MultiLayerNetwork#load(File, boolean)} + * or {@link ModelSerializer#writeModel(IModel, File, boolean)}, to be read with {@link MultiLayerNetwork#load(File, boolean)} * * @param f File that should represent an saved MultiLayerNetwork * @return Result of validation @@ -80,14 +80,14 @@ public class DL4JModelValidator { } try{ - MultiLayerConfiguration.fromJson(config); + NeuralNetConfiguration.fromJson(config); } catch (Throwable t){ return ValidationResult.builder() .formatType("MultiLayerNetwork") .formatClass(MultiLayerNetwork.class) .valid(false) .path(Nd4jCommonValidator.getPath(f)) - .issues(Collections.singletonList("Zip file JSON model configuration does not appear to represent a valid MultiLayerConfiguration")) + .issues(Collections.singletonList("Zip file JSON model configuration does not appear to represent a valid NeuralNetConfiguration")) .exception(t) .build(); } @@ -104,7 +104,7 @@ public class DL4JModelValidator { /** * Validate whether the file represents a valid ComputationGraph saved previously with {@link ComputationGraph#save(File)} - * or {@link ModelSerializer#writeModel(Model, File, boolean)}, to be read with {@link ComputationGraph#load(File, boolean)} + * or {@link ModelSerializer#writeModel(IModel, File, boolean)}, to be read with {@link ComputationGraph#load(File, boolean)} * * @param f File that should represent an saved MultiLayerNetwork * @return Result of validation diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/ModelSerializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/ModelSerializer.java index e636334fd..e763d30bf 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/ModelSerializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/ModelSerializer.java @@ -20,6 +20,7 @@ package org.deeplearning4j.util; +import net.brutex.ai.dnn.api.IModel; import org.apache.commons.io.input.CloseShieldInputStream; import org.deeplearning4j.common.util.DL4JFileUtils; import com.google.common.io.Files; @@ -28,10 +29,9 @@ import lombok.extern.slf4j.Slf4j; import org.apache.commons.io.IOUtils; import org.apache.commons.io.output.CloseShieldOutputStream; import org.deeplearning4j.nn.api.Layer; -import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.nn.api.Updater; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.nd4j.common.base.Preconditions; @@ -74,7 +74,7 @@ public class ModelSerializer { * @param saveUpdater whether to save the updater or not * @throws IOException */ - public static void writeModel(@NonNull Model model, @NonNull File file, boolean saveUpdater) throws IOException { + public static void writeModel(@NonNull IModel model, @NonNull File file, boolean saveUpdater) throws IOException { writeModel(model,file,saveUpdater,null); } @@ -88,7 +88,7 @@ public class ModelSerializer { * @param dataNormalization the normalizer to save (optional) * @throws IOException */ - public static void writeModel(@NonNull Model model, @NonNull File file, boolean saveUpdater,DataNormalization dataNormalization) throws IOException { + public static void writeModel(@NonNull IModel model, @NonNull File file, boolean saveUpdater,DataNormalization dataNormalization) throws IOException { try (BufferedOutputStream stream = new BufferedOutputStream(new FileOutputStream(file))) { writeModel(model, stream, saveUpdater,dataNormalization); } @@ -103,7 +103,7 @@ public class ModelSerializer { * or not * @throws IOException */ - public static void writeModel(@NonNull Model model, @NonNull String path, boolean saveUpdater) throws IOException { + public static void writeModel(@NonNull IModel model, @NonNull String path, boolean saveUpdater) throws IOException { try (BufferedOutputStream stream = new BufferedOutputStream(new FileOutputStream(path))) { writeModel(model, stream, saveUpdater); } @@ -116,7 +116,7 @@ public class ModelSerializer { * @param saveUpdater whether to save the updater for the model or not * @throws IOException */ - public static void writeModel(@NonNull Model model, @NonNull OutputStream stream, boolean saveUpdater) + public static void writeModel(@NonNull IModel model, @NonNull OutputStream stream, boolean saveUpdater) throws IOException { writeModel(model,stream,saveUpdater,null); } @@ -132,14 +132,14 @@ public class ModelSerializer { * @param dataNormalization the normalizer ot save (may be null) * @throws IOException */ - public static void writeModel(@NonNull Model model, @NonNull OutputStream stream, boolean saveUpdater,DataNormalization dataNormalization) + public static void writeModel(@NonNull IModel model, @NonNull OutputStream stream, boolean saveUpdater,DataNormalization dataNormalization) throws IOException { ZipOutputStream zipfile = new ZipOutputStream(new CloseShieldOutputStream(stream)); // Save configuration as JSON String json = ""; if (model instanceof MultiLayerNetwork) { - json = ((MultiLayerNetwork) model).getLayerWiseConfigurations().toJson(); + json = ((MultiLayerNetwork) model).getNetConfiguration().toJson(); } else if (model instanceof ComputationGraph) { json = ((ComputationGraph) model).getComputationGraphConfiguration().toJson(); } @@ -318,20 +318,20 @@ public class ModelSerializer { if (gotConfig && gotCoefficients) { - MultiLayerConfiguration confFromJson; + NeuralNetConfiguration confFromJson; try{ - confFromJson = MultiLayerConfiguration.fromJson(json); + confFromJson = NeuralNetConfiguration.fromJson(json); } catch (Exception e){ ComputationGraphConfiguration cg; try{ cg = ComputationGraphConfiguration.fromJson(json); } catch (Exception e2){ //Invalid, and not a compgraph - throw new RuntimeException("Error deserializing JSON MultiLayerConfiguration. Saved model JSON is" + - " not a valid MultiLayerConfiguration", e); + throw new RuntimeException("Error deserializing JSON NeuralNetConfiguration. Saved model JSON is" + + " not a valid NeuralNetConfiguration", e); } if(cg.getNetworkInputs() != null && cg.getVertices() != null) { - throw new RuntimeException("Error deserializing JSON MultiLayerConfiguration. Saved model appears to be " + + throw new RuntimeException("Error deserializing JSON NeuralNetConfiguration. Saved model appears to be " + "a ComputationGraph - use ModelSerializer.restoreComputationGraph instead"); } else { throw e; @@ -554,7 +554,7 @@ public class ModelSerializer { throw e; } try{ - MultiLayerConfiguration.fromJson(json); + NeuralNetConfiguration.fromJson(json); } catch (Exception e2){ //Invalid, and not a compgraph throw new RuntimeException("Error deserializing JSON ComputationGraphConfiguration. Saved model JSON is" + @@ -652,7 +652,7 @@ public class ModelSerializer { * @param model * @return */ - public static Task taskByModel(Model model) { + public static Task taskByModel(IModel model) { Task task = new Task(); try { task.setArchitectureType(Task.ArchitectureType.RECURRENT); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/NetworkUtils.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/NetworkUtils.java index 4348be74a..900a516cd 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/NetworkUtils.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/NetworkUtils.java @@ -21,14 +21,13 @@ package org.deeplearning4j.util; import lombok.extern.slf4j.Slf4j; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.api.Trainable; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.InputPreProcessor; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.BaseLayer; -import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.graph.vertex.GraphVertex; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; @@ -61,21 +60,21 @@ public class NetworkUtils { // by definition the identical for a MLN and "single stack" computation graph. This also has to hold // for the updater state... - ComputationGraphConfiguration.GraphBuilder b = new NeuralNetConfiguration.Builder() - .dataType(net.getLayerWiseConfigurations().getDataType()) + ComputationGraphConfiguration.GraphBuilder b = NeuralNetConfiguration.builder() + .dataType(net.getNetConfiguration().getDataType()) .graphBuilder(); - MultiLayerConfiguration origConf = net.getLayerWiseConfigurations().clone(); + NeuralNetConfiguration origConf = net.getNetConfiguration().clone(); int layerIdx = 0; String lastLayer = "in"; b.addInputs("in"); - for (NeuralNetConfiguration c : origConf.getConfs()) { + for (NeuralNetConfiguration c : origConf.getNetConfigurations()) { String currLayer = String.valueOf(layerIdx); InputPreProcessor preproc = origConf.getInputPreProcess(layerIdx); - b.addLayer(currLayer, c.getLayer(), preproc, lastLayer); + b.addLayer(currLayer, c.getFlattenedLayerConfigurations().get(layerIdx), preproc, lastLayer); lastLayer = currLayer; layerIdx++; @@ -123,7 +122,7 @@ public class NetworkUtils { private static void setLearningRate(MultiLayerNetwork net, int layerNumber, double newLr, ISchedule newLrSchedule, boolean refreshUpdater) { - Layer l = net.getLayer(layerNumber).conf().getLayer(); + LayerConfiguration l = net.getLayer(layerNumber).getLayerConfiguration(); if (l instanceof BaseLayer) { BaseLayer bl = (BaseLayer) l; IUpdater u = bl.getIUpdater(); @@ -155,8 +154,8 @@ public class NetworkUtils { /** * Set the learning rate schedule for all layers in the network to the specified schedule. * This schedule will replace any/all existing schedules, and also any fixed learning rate values.
- * Note that the iteration/epoch counts will not be reset. Use {@link MultiLayerConfiguration#setIterationCount(int)} - * and {@link MultiLayerConfiguration#setEpochCount(int)} if this is required + * Note that the iteration/epoch counts will not be reset. Use {@link NeuralNetConfiguration#setIterationCount(int)} + * and {@link NeuralNetConfiguration#setEpochCount(int)} if this is required * * @param newLrSchedule New learning rate schedule for all layers */ @@ -184,8 +183,8 @@ public class NetworkUtils { * Note also that {@link #setLearningRate(MultiLayerNetwork, ISchedule)} should also be used in preference, when all layers need * to be set to a new LR schedule.
* This schedule will replace any/all existing schedules, and also any fixed learning rate values.
- * Note also that the iteration/epoch counts will not be reset. Use {@link MultiLayerConfiguration#setIterationCount(int)} - * and {@link MultiLayerConfiguration#setEpochCount(int)} if this is required + * Note also that the iteration/epoch counts will not be reset. Use {@link NeuralNetConfiguration#setIterationCount(int)} + * and {@link NeuralNetConfiguration#setEpochCount(int)} if this is required * * @param layerNumber Number of the layer to set the LR schedule for * @param lrSchedule New learning rate for a single layer @@ -203,7 +202,7 @@ public class NetworkUtils { * @return Learning rate for the specified layer, or null */ public static Double getLearningRate(MultiLayerNetwork net, int layerNumber) { - Layer l = net.getLayer(layerNumber).conf().getLayer(); + LayerConfiguration l = net.getLayer(layerNumber).getLayerConfiguration(); int iter = net.getIterationCount(); int epoch = net.getEpochCount(); if (l instanceof BaseLayer) { @@ -238,14 +237,14 @@ public class NetworkUtils { private static void setLearningRate(ComputationGraph net, double newLr, ISchedule lrSchedule) { org.deeplearning4j.nn.api.Layer[] layers = net.getLayers(); for (int i = 0; i < layers.length; i++) { - setLearningRate(net, layers[i].conf().getLayer().getLayerName(), newLr, lrSchedule, false); + setLearningRate(net, layers[i].getLayerConfiguration().getLayerName(), newLr, lrSchedule, false); } refreshUpdater(net); } private static void setLearningRate(ComputationGraph net, String layerName, double newLr, ISchedule newLrSchedule, boolean refreshUpdater) { - Layer l = net.getLayer(layerName).conf().getLayer(); + LayerConfiguration l = net.getLayer(layerName).getLayerConfiguration(); if (l instanceof BaseLayer) { BaseLayer bl = (BaseLayer) l; IUpdater u = bl.getIUpdater(); @@ -325,7 +324,7 @@ public class NetworkUtils { * @return Learning rate for the specified layer, or null */ public static Double getLearningRate(ComputationGraph net, String layerName) { - Layer l = net.getLayer(layerName).conf().getLayer(); + LayerConfiguration l = net.getLayer(layerName).getLayerConfiguration(); int iter = net.getComputationGraphConfiguration().getIterationCount(); int epoch = net.getComputationGraphConfiguration().getEpochCount(); if (l instanceof BaseLayer) { @@ -353,7 +352,7 @@ public class NetworkUtils { * @see org.deeplearning4j.nn.graph.ComputationGraph#outputSingle(INDArray...) * @see org.deeplearning4j.nn.multilayer.MultiLayerNetwork#output(INDArray) */ - public static INDArray output(Model model, INDArray input) { + public static INDArray output(IModel model, INDArray input) { if (model instanceof MultiLayerNetwork) { final MultiLayerNetwork multiLayerNetwork = (MultiLayerNetwork) model; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/OutputLayerUtil.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/OutputLayerUtil.java index fb3d9ea64..76f06b556 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/OutputLayerUtil.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/OutputLayerUtil.java @@ -58,7 +58,7 @@ public class OutputLayerUtil { OUTSIDE_ZERO_ONE_RANGE.add(ActivationThresholdedReLU.class); } - private static final String COMMON_MSG = "\nThis configuration validation check can be disabled for MultiLayerConfiguration" + + private static final String COMMON_MSG = "\nThis configuration validation check can be disabled for NeuralNetConfiguration" + " and ComputationGraphConfiguration using validateOutputLayerConfig(false), however this is not recommended."; @@ -70,7 +70,7 @@ public class OutputLayerUtil { * @param layerName Name of the layer * @param layer ILayer */ - public static void validateOutputLayer(String layerName, Layer layer){ + public static void validateOutputLayer(String layerName, LayerConfiguration layer){ IActivation activation; ILossFunction loss; long nOut; @@ -166,7 +166,7 @@ public class OutputLayerUtil { * @param outputLayer Output layer * @param classifierEval Class for the classifier evaluation */ - public static void validateOutputLayerForClassifierEvaluation(Layer outputLayer, Class classifierEval){ + public static void validateOutputLayerForClassifierEvaluation(LayerConfiguration outputLayer, Class classifierEval){ if(outputLayer instanceof Yolo2OutputLayer){ throw new IllegalStateException("Classifier evaluation using " + classifierEval.getSimpleName() + " class cannot be applied for object" + " detection evaluation using Yolo2OutputLayer: " + classifierEval.getSimpleName() + " class is for classifier evaluation only."); @@ -182,7 +182,7 @@ public class OutputLayerUtil { throw new IllegalStateException("Classifier evaluation using " + classifierEval.getSimpleName() + " class cannot be applied to output" + " layers with activation functions that are not probabilities (in range 0 to 1). Output layer type: " + outputLayer.getClass().getSimpleName() + " has activation function " + bl.getActivationFn().getClass().getSimpleName() + - ". This check can be disabled using MultiLayerNetwork.getLayerWiseConfigurations().setValidateOutputLayerConfig(false)" + + ". This check can be disabled using MultiLayerNetwork.getConfiguration().setValidateOutputLayerConfig(false)" + " or ComputationGraph.getConfiguration().setValidateOutputLayerConfig(false)"); } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/TimeSeriesUtils.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/TimeSeriesUtils.java index eb5814b49..4723211b9 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/TimeSeriesUtils.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/TimeSeriesUtils.java @@ -23,7 +23,7 @@ package org.deeplearning4j.util; import lombok.val; import org.deeplearning4j.nn.conf.RNNFormat; import org.deeplearning4j.nn.conf.layers.BaseRecurrentLayer; -import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.recurrent.Bidirectional; import org.deeplearning4j.nn.conf.layers.recurrent.LastTimeStep; import org.deeplearning4j.nn.conf.layers.recurrent.TimeDistributed; @@ -442,7 +442,7 @@ public class TimeSeriesUtils { * LastTimeStep, etc * @param layer ILayer to get the RNNFormat from */ - public static RNNFormat getFormatFromRnnLayer(Layer layer){ + public static RNNFormat getFormatFromRnnLayer(LayerConfiguration layer){ if(layer instanceof BaseRecurrentLayer){ return ((BaseRecurrentLayer) layer).getRnnDataFormat(); } else if(layer instanceof MaskZeroLayer){ diff --git a/cavis-dnn/cavis-dnn-nn/src/main/resources/simplelogger.properties b/cavis-dnn/cavis-dnn-nn/src/main/resources/simplelogger.properties new file mode 100644 index 000000000..93090cbc4 --- /dev/null +++ b/cavis-dnn/cavis-dnn-nn/src/main/resources/simplelogger.properties @@ -0,0 +1,22 @@ +# +# +# ****************************************************************************** +# * +# * This program and the accompanying materials are made available under the +# * terms of the Apache License, Version 2.0 which is available at +# * https://www.apache.org/licenses/LICENSE-2.0. +# * +# * See the NOTICE file distributed with this work for additional +# * information regarding copyright ownership. +# * Unless required by applicable law or agreed to in writing, software +# * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# * License for the specific language governing permissions and limitations +# * under the License. +# * +# * SPDX-License-Identifier: Apache-2.0 +# ***************************************************************************** +# +# + +org.slf4j.simpleLogger.defaultLogLevel = trace \ No newline at end of file diff --git a/cavis-dnn/cavis-dnn-nn/src/test/java/net/brutex/ai/dnn/api/dnnTest.java b/cavis-dnn/cavis-dnn-nn/src/test/java/net/brutex/ai/dnn/api/dnnTest.java index 06c322a57..9ca79badc 100644 --- a/cavis-dnn/cavis-dnn-nn/src/test/java/net/brutex/ai/dnn/api/dnnTest.java +++ b/cavis-dnn/cavis-dnn-nn/src/test/java/net/brutex/ai/dnn/api/dnnTest.java @@ -21,18 +21,17 @@ package net.brutex.ai.dnn.api; -import static net.brutex.ai.dnn.api.dnn.*; -import static org.junit.jupiter.api.Assertions.*; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.Iterator; import java.util.concurrent.atomic.AtomicInteger; import org.apache.commons.lang3.RandomUtils; import org.deeplearning4j.datasets.iterator.FloatsDataSetIterator; -import org.deeplearning4j.nn.conf.CacheMode; import org.deeplearning4j.nn.conf.GradientNormalization; -import org.deeplearning4j.nn.conf.Updater; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.ActivationLayer; +import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.weights.WeightInitXavier; import org.junit.jupiter.api.Test; import org.nd4j.common.primitives.Pair; @@ -53,8 +52,10 @@ class dnnTest { assertTrue(iterator.hasNext()); + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().build(); + /** - * MultiLayerConfiguration confxx = new NeuralNetConfiguration.Builder() + * NeuralNetConfiguration confxx = NeuralNetConfiguration.builder() * .seed(42) * .updater(UPDATER) * .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer) @@ -62,7 +63,7 @@ class dnnTest { * .weightInit(WeightInit.XAVIER) * .activation(Activation.IDENTITY) * .list(genLayers()) - * .setInputType(InputType.convolutional(X_DIM, Y_DIM, CHANNELS)) + * .inputType(InputType.convolutional(X_DIM, Y_DIM, CHANNELS)) * // .inputPreProcessor("CNN1", new FeedForwardToCnnPreProcessor(Y_DIM, X_DIM, CHANNELS)) * .build(); */ @@ -76,20 +77,18 @@ class dnnTest { * new ActivationLayer.Builder(new ActivationLReLU(0.2)).build(), * new DenseLayer.Builder().nIn(X_DIM*Y_DIM).nOut(X_DIM*Y_DIM*CHANNELS).activation(Activation.TANH) */ - dnn.conf() + NN.net() .seed(42) .updater( Adam.builder().learningRate(0.0002).beta1(0.5).build() ) .gradientNormalization( GradientNormalization.RenormalizeL2PerLayer) .gradientNormalizationThreshold( 100 ) - .weightInit( new WeightInitXavier() ) - .activation( new ActivationIdentity() ) + .weightInitFn( new WeightInitXavier() ) + .activationFn( new ActivationIdentity() ) .inputType( InputType.convolutional( 28, 28, 1)) - .layer( dnn.DenseLayer(10,30).build() ) + .layer( new DenseLayer.Builder().nIn(10).nOut(20).build() ) .layer(new ActivationLayer.Builder(new ActivationLReLU(0.2)).build() ) - ; - } protected static Iterable> floatIterable(final int totalRows, final int numColumns) { diff --git a/cavis-dnn/cavis-dnn-nn/src/test/java/net/brutex/ai/dnn/conf/layer/FFLayerTest.java b/cavis-dnn/cavis-dnn-nn/src/test/java/net/brutex/ai/dnn/conf/layer/FFLayerTest.java index 2fa944000..8430ec35d 100644 --- a/cavis-dnn/cavis-dnn-nn/src/test/java/net/brutex/ai/dnn/conf/layer/FFLayerTest.java +++ b/cavis-dnn/cavis-dnn-nn/src/test/java/net/brutex/ai/dnn/conf/layer/FFLayerTest.java @@ -21,23 +21,12 @@ package net.brutex.ai.dnn.conf.layer; -import net.brutex.ai.dnn.api.IModel; -import net.brutex.ai.dnn.api.INeuralNetworkConfiguration; -import net.brutex.ai.dnn.api.ILayerConfiguration; import org.junit.jupiter.api.Test; class FFLayerTest { @Test void instantiate() { - ILayerConfiguration ff_conf = FeedForwardLayerConfiguration.builder().build(); - INeuralNetworkConfiguration net_conf = net.brutex.ai.dnn.conf.NeuralNetworkConfiguration.builder() - .layerConfiguration(ff_conf) - .build(); - IModel network = net.brutex.ai.dnn.impl.network.NeuralNetwork.builder().name("Test Network") - .configuration(net_conf) - .build(); - ff_conf.instantiate(network); } diff --git a/cavis-dnn/cavis-dnn-parallelwrapper-parameterserver/src/main/java/org/deeplearning4j/parallelism/parameterserver/ParameterServerTrainer.java b/cavis-dnn/cavis-dnn-parallelwrapper-parameterserver/src/main/java/org/deeplearning4j/parallelism/parameterserver/ParameterServerTrainer.java index ce14ae0b6..7af10085b 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper-parameterserver/src/main/java/org/deeplearning4j/parallelism/parameterserver/ParameterServerTrainer.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper-parameterserver/src/main/java/org/deeplearning4j/parallelism/parameterserver/ParameterServerTrainer.java @@ -25,7 +25,7 @@ import lombok.Builder; import lombok.NoArgsConstructor; import lombok.NonNull; import lombok.extern.slf4j.Slf4j; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.parallelism.ParallelWrapper; @@ -82,23 +82,23 @@ public class ParameterServerTrainer extends DefaultTrainer { } @Override - public Model getModel() { + public IModel getModel() { return super.getModel(); } @Override - public void updateModel(@NonNull Model model) { + public void updateModel(@NonNull IModel model) { super.updateModel(model); } public static class ParameterServerTrainerBuilder extends DefaultTrainerBuilder { @Override - public ParameterServerTrainerBuilder originalModel(Model originalModel) { + public ParameterServerTrainerBuilder originalModel(IModel originalModel) { return (ParameterServerTrainerBuilder) super.originalModel(originalModel); } @Override - public ParameterServerTrainerBuilder replicatedModel(Model replicatedModel) { + public ParameterServerTrainerBuilder replicatedModel(IModel replicatedModel) { return (ParameterServerTrainerBuilder) super.replicatedModel(replicatedModel); } diff --git a/cavis-dnn/cavis-dnn-parallelwrapper-parameterserver/src/main/java/org/deeplearning4j/parallelism/parameterserver/ParameterServerTrainerContext.java b/cavis-dnn/cavis-dnn-parallelwrapper-parameterserver/src/main/java/org/deeplearning4j/parallelism/parameterserver/ParameterServerTrainerContext.java index 47d04d303..89f8d71a9 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper-parameterserver/src/main/java/org/deeplearning4j/parallelism/parameterserver/ParameterServerTrainerContext.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper-parameterserver/src/main/java/org/deeplearning4j/parallelism/parameterserver/ParameterServerTrainerContext.java @@ -21,7 +21,7 @@ package org.deeplearning4j.parallelism.parameterserver; import io.aeron.driver.MediaDriver; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.conf.WorkspaceMode; import org.deeplearning4j.optimize.api.TrainingListener; import org.deeplearning4j.parallelism.ParallelWrapper; @@ -47,7 +47,7 @@ public class ParameterServerTrainerContext implements TrainerContext { * @param args the arguments to initialize with (maybe null) */ @Override - public void init(Model model, Object... args) { + public void init(IModel model, Object... args) { mediaDriverContext = new MediaDriver.Context(); mediaDriver = MediaDriver.launchEmbedded(mediaDriverContext); parameterServerNode = new ParameterServerNode(mediaDriver, statusServerPort, numWorkers); @@ -73,7 +73,7 @@ public class ParameterServerTrainerContext implements TrainerContext { * @return the created training instance */ @Override - public Trainer create(String uuid, int threadId, Model model, int rootDevice, boolean useMDS, ParallelWrapper wrapper, + public Trainer create(String uuid, int threadId, IModel model, int rootDevice, boolean useMDS, ParallelWrapper wrapper, WorkspaceMode mode, int averagingFrequency) { return ParameterServerTrainer.builder().originalModel(model).parameterServerClient(ParameterServerClient .builder().aeron(parameterServerNode.getAeron()) @@ -86,12 +86,12 @@ public class ParameterServerTrainerContext implements TrainerContext { } @Override - public void finalizeRound(Model originalModel, Model... models) { + public void finalizeRound(IModel originalModel, IModel... models) { // no-op } @Override - public void finalizeTraining(Model originalModel, Model... models) { + public void finalizeTraining(IModel originalModel, IModel... models) { // no-op } } diff --git a/cavis-dnn/cavis-dnn-parallelwrapper-parameterserver/src/test/java/org/deeplearning4j/parallelism/parameterserver/ParameterServerParallelWrapperTest.java b/cavis-dnn/cavis-dnn-parallelwrapper-parameterserver/src/test/java/org/deeplearning4j/parallelism/parameterserver/ParameterServerParallelWrapperTest.java index d92cdf753..cfeaf0821 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper-parameterserver/src/test/java/org/deeplearning4j/parallelism/parameterserver/ParameterServerParallelWrapperTest.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper-parameterserver/src/test/java/org/deeplearning4j/parallelism/parameterserver/ParameterServerParallelWrapperTest.java @@ -23,8 +23,8 @@ package org.deeplearning4j.parallelism.parameterserver; import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration.NeuralNetConfigurationBuilder; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; import org.deeplearning4j.nn.conf.layers.DenseLayer; @@ -56,7 +56,7 @@ public class ParameterServerParallelWrapperTest extends BaseDL4JTest { DataSetIterator mnistTest = new MnistDataSetIterator(batchSize, false, 12345); log.info("Build model...."); - MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(seed) + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().seed(seed) .l2(0.0005) .weightInit(WeightInit.XAVIER) .updater(new Nesterovs(0.01, 0.9)).list() @@ -73,9 +73,9 @@ public class ParameterServerParallelWrapperTest extends BaseDL4JTest { .layer(4, new DenseLayer.Builder().activation(Activation.RELU).nOut(500).build()) .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nOut(outputNum).activation(Activation.SOFTMAX).build()) - .setInputType(InputType.convolutionalFlat(28, 28, 1)); + .inputType(InputType.convolutionalFlat(28, 28, 1)); - MultiLayerConfiguration conf = builder.build(); + NeuralNetConfiguration conf = builder.build(); MultiLayerNetwork model = new MultiLayerNetwork(conf); model.init(); diff --git a/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/EarlyStoppingParallelTrainer.java b/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/EarlyStoppingParallelTrainer.java index e1f8b9273..a7b4a98bc 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/EarlyStoppingParallelTrainer.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/EarlyStoppingParallelTrainer.java @@ -22,6 +22,7 @@ package org.deeplearning4j.parallelism; import com.google.common.util.concurrent.AtomicDouble; import lombok.extern.slf4j.Slf4j; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.earlystopping.EarlyStoppingConfiguration; import org.deeplearning4j.earlystopping.EarlyStoppingResult; import org.deeplearning4j.earlystopping.listener.EarlyStoppingListener; @@ -29,7 +30,6 @@ import org.deeplearning4j.earlystopping.scorecalc.ScoreCalculator; import org.deeplearning4j.earlystopping.termination.EpochTerminationCondition; import org.deeplearning4j.earlystopping.termination.IterationTerminationCondition; import org.deeplearning4j.earlystopping.trainer.IEarlyStoppingTrainer; -import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.optimize.api.BaseTrainingListener; @@ -45,7 +45,7 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; @Slf4j -public class EarlyStoppingParallelTrainer implements IEarlyStoppingTrainer { +public class EarlyStoppingParallelTrainer implements IEarlyStoppingTrainer { protected T model; @@ -314,7 +314,7 @@ public class EarlyStoppingParallelTrainer implements IEarlyStop * with each averaging step, and thus averaging is considered analogous to an iteration. * @param */ - private class AveragingTrainingListener extends BaseTrainingListener { + private class AveragingTrainingListener extends BaseTrainingListener { private final Logger log = LoggerFactory.getLogger(AveragingTrainingListener.class); private final IterationTerminationCondition terminationReason = null; private final EarlyStoppingParallelTrainer trainer; @@ -325,7 +325,7 @@ public class EarlyStoppingParallelTrainer implements IEarlyStop } @Override - public void iterationDone(Model model, int iteration, int epoch) { + public void iterationDone(IModel model, int iteration, int epoch) { //Check per-iteration termination conditions double latestScore = model.score(); trainer.setLatestScore(latestScore); diff --git a/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/InplaceParallelInference.java b/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/InplaceParallelInference.java index 9f32446ae..33009e994 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/InplaceParallelInference.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/InplaceParallelInference.java @@ -23,10 +23,10 @@ package org.deeplearning4j.parallelism; import lombok.*; import lombok.extern.slf4j.Slf4j; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.api.ModelAdapter; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.parallelism.inference.LoadBalanceMode; @@ -69,14 +69,14 @@ public class InplaceParallelInference extends ParallelInference { } @Override - public synchronized void updateModel(@NonNull Model model) { + public synchronized void updateModel(@NonNull IModel model) { for (val h:holders) h.updateModel(model); } @Override - protected synchronized Model[] getCurrentModelsFromWorkers() { - val models = new Model[holders.size()]; + protected synchronized IModel[] getCurrentModelsFromWorkers() { + val models = new IModel[holders.size()]; int cnt = 0; for (val h:holders) { models[cnt++] = h.sourceModel; @@ -101,7 +101,7 @@ public class InplaceParallelInference extends ParallelInference { */ public T output(@NonNull ModelAdapter adapter, INDArray[] input, INDArray[] inputMasks, INDArray[] labelsMasks) { val holder = selector.getModelForThisThread(); - Model model = null; + IModel model = null; boolean acquired = false; try { model = holder.acquireModel(); @@ -158,9 +158,9 @@ public class InplaceParallelInference extends ParallelInference { @AllArgsConstructor @lombok.Builder protected static class ModelHolder { - protected Model sourceModel; + protected IModel sourceModel; @lombok.Builder.Default protected int workers = 4; - @lombok.Builder.Default protected List replicas = new ArrayList<>(); + @lombok.Builder.Default protected List replicas = new ArrayList<>(); @lombok.Builder.Default protected boolean rootDevice = true; @lombok.Builder.Default protected LoadBalanceMode loadBalanceMode = LoadBalanceMode.ROUND_ROBIN; protected int targetDeviceId; @@ -169,7 +169,7 @@ public class InplaceParallelInference extends ParallelInference { protected final ReentrantReadWriteLock modelLock = new ReentrantReadWriteLock(); // this queue is used in FIFO mode - protected final BlockingQueue queue = new LinkedBlockingQueue<>(); + protected final BlockingQueue queue = new LinkedBlockingQueue<>(); @lombok.Builder.Default protected transient boolean isCG = false; @lombok.Builder.Default protected transient boolean isMLN = false; @@ -204,7 +204,7 @@ public class InplaceParallelInference extends ParallelInference { if (loadBalanceMode == LoadBalanceMode.FIFO) queue.add(model); } else if (sourceModel instanceof MultiLayerNetwork) { - val model = new MultiLayerNetwork(MultiLayerConfiguration.fromJson(((MultiLayerNetwork) sourceModel).getLayerWiseConfigurations().toJson())); + val model = new MultiLayerNetwork(NeuralNetConfiguration.fromJson(((MultiLayerNetwork) sourceModel).getConfiguration().toJson())); model.init(params, false); Nd4j.getExecutioner().commit(); @@ -217,7 +217,7 @@ public class InplaceParallelInference extends ParallelInference { } - protected Model acquireModel() throws InterruptedException { + protected IModel acquireModel() throws InterruptedException { try { modelLock.readLock().lock(); @@ -235,7 +235,7 @@ public class InplaceParallelInference extends ParallelInference { } } - protected void releaseModel(Model model) { + protected void releaseModel(IModel model) { try { modelLock.readLock().lock(); @@ -290,7 +290,7 @@ public class InplaceParallelInference extends ParallelInference { } } - protected void updateModel(@NonNull Model model) { + protected void updateModel(@NonNull IModel model) { try { modelLock.writeLock().lock(); diff --git a/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/ParallelInference.java b/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/ParallelInference.java index 8547e7b9f..ea2e02ad7 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/ParallelInference.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/ParallelInference.java @@ -23,10 +23,10 @@ package org.deeplearning4j.parallelism; import lombok.NonNull; import lombok.extern.slf4j.Slf4j; import lombok.val; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.api.ModelAdapter; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.parallelism.inference.InferenceMode; @@ -52,7 +52,7 @@ import java.util.concurrent.locks.ReentrantReadWriteLock; @Slf4j public class ParallelInference { - protected Model model; + protected IModel model; protected long nanos; protected int workers; protected int batchLimit; @@ -86,7 +86,7 @@ public class ParallelInference { * * @param model */ - public void updateModel(@NonNull Model model) { + public void updateModel(@NonNull IModel model) { if (zoo != null) { for (val w: zoo) w.updateModel(model); @@ -102,11 +102,11 @@ public class ParallelInference { * * @return */ - protected Model[] getCurrentModelsFromWorkers() { + protected IModel[] getCurrentModelsFromWorkers() { if (zoo == null) - return new Model[0]; + return new IModel[0]; - val models = new Model[zoo.length]; + val models = new IModel[zoo.length]; int cnt = 0; for (val w:zoo) { models[cnt++] = w.replicatedModel; @@ -284,14 +284,14 @@ public class ParallelInference { public static class Builder { - private final Model model; + private final IModel model; private int workers = DEFAULT_NUM_WORKERS; private int batchLimit = DEFAULT_BATCH_LIMIT; private InferenceMode inferenceMode = DEFAULT_INFERENCE_MODE; private int queueLimit = DEFAULT_QUEUE_LIMIT; protected LoadBalanceMode loadBalanceMode = LoadBalanceMode.FIFO; - public Builder(@NonNull Model model) { + public Builder(@NonNull IModel model) { this.model = model; } @@ -416,15 +416,15 @@ public class ParallelInference { private final BlockingQueue inputQueue; private final AtomicBoolean shouldWork = new AtomicBoolean(true); private final AtomicBoolean isStopped = new AtomicBoolean(false); - private Model protoModel; - private Model replicatedModel; + private IModel protoModel; + private IModel replicatedModel; private final AtomicLong counter = new AtomicLong(0); private final boolean rootDevice; private final int deviceId; private final ReentrantReadWriteLock modelLock = new ReentrantReadWriteLock(); - private InferenceWorker(int id, @NonNull Model model, @NonNull BlockingQueue inputQueue, boolean rootDevice, int deviceId) { + private InferenceWorker(int id, @NonNull IModel model, @NonNull BlockingQueue inputQueue, boolean rootDevice, int deviceId) { this.inputQueue = inputQueue; this.protoModel = model; this.rootDevice = rootDevice; @@ -439,7 +439,7 @@ public class ParallelInference { return counter.get(); } - protected void updateModel(@NonNull Model model) { + protected void updateModel(@NonNull IModel model) { try { modelLock.writeLock().lock(); this.protoModel = model; @@ -471,8 +471,8 @@ public class ParallelInference { } } else if (protoModel instanceof MultiLayerNetwork) { if (!rootDevice) { - this.replicatedModel = new MultiLayerNetwork(MultiLayerConfiguration.fromJson( - ((MultiLayerNetwork) protoModel).getLayerWiseConfigurations().toJson())); + this.replicatedModel = new MultiLayerNetwork(NeuralNetConfiguration.fromJson( + ((MultiLayerNetwork) protoModel).getConfiguration().toJson())); this.replicatedModel.init(); synchronized (locker) { diff --git a/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/ParallelWrapper.java b/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/ParallelWrapper.java index 8da3b5262..e2a621508 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/ParallelWrapper.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/ParallelWrapper.java @@ -22,6 +22,7 @@ package org.deeplearning4j.parallelism; import lombok.*; import lombok.extern.slf4j.Slf4j; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.core.storage.StatsStorageRouter; import org.deeplearning4j.core.storage.listener.RoutingIterationListener; import org.deeplearning4j.optimize.solvers.accumulation.EncodingHandler; @@ -32,7 +33,6 @@ import org.deeplearning4j.datasets.iterator.DummyBlockDataSetIterator; import org.deeplearning4j.datasets.iterator.DummyBlockMultiDataSetIterator; import org.deeplearning4j.datasets.iterator.callbacks.InterleavedDataSetCallback; import org.deeplearning4j.exception.DL4JInvalidConfigException; -import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.nn.api.Updater; import org.deeplearning4j.nn.conf.WorkspaceMode; import org.deeplearning4j.nn.graph.ComputationGraph; @@ -49,7 +49,6 @@ import org.deeplearning4j.parallelism.factory.DefaultTrainerContext; import org.deeplearning4j.parallelism.factory.SymmetricTrainerContext; import org.deeplearning4j.parallelism.factory.TrainerContext; import org.deeplearning4j.parallelism.trainer.Trainer; -import org.nd4j.common.base.Preconditions; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator; @@ -93,7 +92,7 @@ public class ParallelWrapper implements AutoCloseable { protected AtomicBoolean exceptionEncountered; protected Throwable exception; protected final String uuid = java.util.UUID.randomUUID().toString(); - protected Model model; + protected IModel model; protected int workers = 2; protected int prefetchSize = 2; protected int averagingFrequency = 1; @@ -131,7 +130,7 @@ public class ParallelWrapper implements AutoCloseable { } }; - protected ParallelWrapper(Model model, int workers, int prefetchSize) { + protected ParallelWrapper(IModel model, int workers, int prefetchSize) { this.model = model; this.workers = workers; this.prefetchSize = prefetchSize; @@ -669,7 +668,7 @@ public class ParallelWrapper implements AutoCloseable { } } - public static class Builder { + public static class Builder { protected TrainingMode trainingMode = TrainingMode.AVERAGING; protected T model; protected int workers = Nd4j.getAffinityManager().getNumberOfDevices(); diff --git a/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/factory/DefaultTrainerContext.java b/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/factory/DefaultTrainerContext.java index 4aea543eb..dc9fa3982 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/factory/DefaultTrainerContext.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/factory/DefaultTrainerContext.java @@ -20,7 +20,7 @@ package org.deeplearning4j.parallelism.factory; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.conf.WorkspaceMode; import org.deeplearning4j.optimize.api.TrainingListener; import org.deeplearning4j.parallelism.ParallelWrapper; @@ -35,7 +35,7 @@ public class DefaultTrainerContext implements TrainerContext { * @param args the arguments to initialize with (maybe null) */ @Override - public void init(Model model, Object... args) { + public void init(IModel model, Object... args) { } @@ -53,7 +53,7 @@ public class DefaultTrainerContext implements TrainerContext { * @return the created training instance */ @Override - public Trainer create(String uuid, int threadId, Model model, int rootDevice, boolean useMDS, ParallelWrapper wrapper, + public Trainer create(String uuid, int threadId, IModel model, int rootDevice, boolean useMDS, ParallelWrapper wrapper, WorkspaceMode mode, int averagingFrequency) { DefaultTrainer trainer = DefaultTrainer.builder().originalModel(model).replicatedModel(model).threadId(threadId) @@ -68,14 +68,14 @@ public class DefaultTrainerContext implements TrainerContext { } @Override - public void finalizeRound(Model originalModel, Model... models) { + public void finalizeRound(IModel originalModel, IModel... models) { // apply averaging // TODO: move averaging here } @Override - public void finalizeTraining(Model originalModel, Model... models) { + public void finalizeTraining(IModel originalModel, IModel... models) { finalizeRound(originalModel, models); } } diff --git a/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/factory/SymmetricTrainerContext.java b/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/factory/SymmetricTrainerContext.java index 3febe09c0..663cb148c 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/factory/SymmetricTrainerContext.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/factory/SymmetricTrainerContext.java @@ -21,11 +21,10 @@ package org.deeplearning4j.parallelism.factory; import lombok.extern.slf4j.Slf4j; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.conf.WorkspaceMode; import org.deeplearning4j.optimize.api.TrainingListener; import org.deeplearning4j.parallelism.ParallelWrapper; -import org.deeplearning4j.parallelism.trainer.DefaultTrainer; import org.deeplearning4j.parallelism.trainer.SymmetricTrainer; import org.deeplearning4j.parallelism.trainer.Trainer; @@ -38,7 +37,7 @@ public class SymmetricTrainerContext implements TrainerContext { * @param args the arguments to initialize with (maybe null) */ @Override - public void init(Model model, Object... args) { + public void init(IModel model, Object... args) { } @@ -56,7 +55,7 @@ public class SymmetricTrainerContext implements TrainerContext { * @return the created training instance */ @Override - public Trainer create(String uuid, int threadId, Model model, int rootDevice, boolean useMDS, ParallelWrapper wrapper, + public Trainer create(String uuid, int threadId, IModel model, int rootDevice, boolean useMDS, ParallelWrapper wrapper, WorkspaceMode mode, int averagingFrequency) { SymmetricTrainer trainer = new SymmetricTrainer(model, uuid, threadId, mode, wrapper, useMDS); @@ -68,12 +67,12 @@ public class SymmetricTrainerContext implements TrainerContext { } @Override - public void finalizeRound(Model originalModel, Model... models) { + public void finalizeRound(IModel originalModel, IModel... models) { // no-op } @Override - public void finalizeTraining(Model originalModel, Model... models) { + public void finalizeTraining(IModel originalModel, IModel... models) { // we CAN avarage here, but for now we'll just push first model params to original model originalModel.setParams(models[0].params()); } diff --git a/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/factory/TrainerContext.java b/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/factory/TrainerContext.java index cc1bd53f7..57cdd76fd 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/factory/TrainerContext.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/factory/TrainerContext.java @@ -20,7 +20,7 @@ package org.deeplearning4j.parallelism.factory; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.conf.WorkspaceMode; import org.deeplearning4j.parallelism.ParallelWrapper; import org.deeplearning4j.parallelism.trainer.Trainer; @@ -33,7 +33,7 @@ public interface TrainerContext { * @param model * @param args the arguments to initialize with (maybe null) */ - void init(Model model, Object... args); + void init(IModel model, Object... args); /** * Create a {@link Trainer} @@ -47,7 +47,7 @@ public interface TrainerContext { * for coordination with the {@link ParallelWrapper} 's {@link org.deeplearning4j.optimize.api.TrainingListener} * @return the created training instance */ - Trainer create(String uuid, int threadId, Model model, int rootDevice, boolean useMDS, ParallelWrapper wrapper, + Trainer create(String uuid, int threadId, IModel model, int rootDevice, boolean useMDS, ParallelWrapper wrapper, WorkspaceMode workspaceMode, int averagingFrequency); @@ -57,7 +57,7 @@ public interface TrainerContext { * @param originalModel * @param models */ - void finalizeRound(Model originalModel, Model... models); + void finalizeRound(IModel originalModel, IModel... models); /** * This method is called @@ -65,5 +65,5 @@ public interface TrainerContext { * @param originalModel * @param models */ - void finalizeTraining(Model originalModel, Model... models); + void finalizeTraining(IModel originalModel, IModel... models); } diff --git a/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/main/ParallelWrapperMain.java b/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/main/ParallelWrapperMain.java index c0f6c9785..26e76ed61 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/main/ParallelWrapperMain.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/main/ParallelWrapperMain.java @@ -25,10 +25,10 @@ import com.beust.jcommander.Parameter; import com.beust.jcommander.ParameterException; import lombok.Data; import lombok.extern.slf4j.Slf4j; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.common.config.DL4JClassLoading; import org.deeplearning4j.core.storage.StatsStorageRouter; import org.deeplearning4j.core.storage.impl.RemoteUIStatsStorageRouter; -import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.optimize.api.TrainingListener; import org.deeplearning4j.parallelism.ParallelWrapper; import org.deeplearning4j.core.util.ModelGuesser; @@ -101,7 +101,7 @@ public class ParallelWrapperMain { public void run() throws Exception { - Model model = ModelGuesser.loadModelGuess(modelPath); + IModel model = ModelGuesser.loadModelGuess(modelPath); // ParallelWrapper will take care of load balancing between GPUs. wrapper = new ParallelWrapper.Builder(model) // DataSets prefetching options. Set this value with respect to number of actual devices diff --git a/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/trainer/DefaultTrainer.java b/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/trainer/DefaultTrainer.java index be706234f..dd7cda946 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/trainer/DefaultTrainer.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/trainer/DefaultTrainer.java @@ -22,12 +22,12 @@ package org.deeplearning4j.parallelism.trainer; import lombok.*; import lombok.extern.slf4j.Slf4j; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.core.storage.StatsStorageRouter; import org.deeplearning4j.core.storage.listener.RoutingIterationListener; -import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.nn.api.Updater; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.WorkspaceMode; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; @@ -56,7 +56,7 @@ import java.util.concurrent.locks.ReentrantReadWriteLock; @AllArgsConstructor public class DefaultTrainer extends Thread implements Trainer { - protected Model replicatedModel; + protected IModel replicatedModel; // TODO: make queue size configurable @Builder.Default @@ -89,7 +89,7 @@ public class DefaultTrainer extends Thread implements Trainer { protected WorkspaceMode workspaceMode; protected int averagingFrequency; protected int threadId; - protected Model originalModel; + protected IModel originalModel; protected final ReentrantReadWriteLock modelLock = new ReentrantReadWriteLock(); @@ -135,12 +135,12 @@ public class DefaultTrainer extends Thread implements Trainer { } @Override - public Model getModel() { + public IModel getModel() { return replicatedModel; } @Override - public void updateModel(@NonNull Model model) { + public void updateModel(@NonNull IModel model) { this.shouldUpdate.set(true); try { modelLock.writeLock().lock(); @@ -295,8 +295,8 @@ public class DefaultTrainer extends Thread implements Trainer { // however, we don't need clone or anything here if (originalModel instanceof MultiLayerNetwork) { if (!onRootModel) { - MultiLayerConfiguration conf = MultiLayerConfiguration.fromJson( - ((MultiLayerNetwork) originalModel).getLayerWiseConfigurations().toJson()); + NeuralNetConfiguration conf = NeuralNetConfiguration.fromJson( + ((MultiLayerNetwork) originalModel).getConfiguration().toJson()); conf.setTrainingWorkspaceMode(workspaceMode); this.replicatedModel = new MultiLayerNetwork(conf); @@ -323,7 +323,7 @@ public class DefaultTrainer extends Thread implements Trainer { if (!((MultiLayerNetwork) replicatedModel).isInitCalled()) this.replicatedModel.init(); - ((MultiLayerNetwork) replicatedModel).getLayerWiseConfigurations() + ((MultiLayerNetwork) replicatedModel).getConfiguration() .setTrainingWorkspaceMode(workspaceMode); } } else if (originalModel instanceof ComputationGraph) { diff --git a/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/trainer/SymmetricTrainer.java b/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/trainer/SymmetricTrainer.java index 96e02cad8..a3a3c57db 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/trainer/SymmetricTrainer.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/trainer/SymmetricTrainer.java @@ -22,7 +22,7 @@ package org.deeplearning4j.parallelism.trainer; import lombok.NonNull; import lombok.extern.slf4j.Slf4j; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.conf.WorkspaceMode; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; @@ -34,7 +34,7 @@ import org.deeplearning4j.parallelism.ParallelWrapper; public class SymmetricTrainer extends DefaultTrainer implements CommunicativeTrainer { protected GradientsAccumulator accumulator; - public SymmetricTrainer(@NonNull Model originalModel, String uuid, int threadIdx, @NonNull WorkspaceMode mode, + public SymmetricTrainer(@NonNull IModel originalModel, String uuid, int threadIdx, @NonNull WorkspaceMode mode, @NonNull ParallelWrapper wrapper, boolean useMDS) { super(); this.uuid = uuid + "_thread_" + threadIdx; diff --git a/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/trainer/Trainer.java b/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/trainer/Trainer.java index 51e9be570..bdc773b0f 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/trainer/Trainer.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/trainer/Trainer.java @@ -21,7 +21,7 @@ package org.deeplearning4j.parallelism.trainer; import lombok.NonNull; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.api.DataSet; import org.nd4j.linalg.dataset.api.MultiDataSet; @@ -54,17 +54,17 @@ public interface Trainer extends Runnable { /** * THe current model for the trainer - * @return the current {@link Model} + * @return the current {@link IModel} * for the worker */ - Model getModel(); + IModel getModel(); /** - * Update the current {@link Model} + * Update the current {@link IModel} * for the worker * @param model the new model for this worker */ - void updateModel(@NonNull Model model); + void updateModel(@NonNull IModel model); boolean isRunning(); diff --git a/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/InplaceParallelInferenceTest.java b/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/InplaceParallelInferenceTest.java index ecb28ef9b..e64e6d06f 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/InplaceParallelInferenceTest.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/InplaceParallelInferenceTest.java @@ -40,7 +40,7 @@ public class InplaceParallelInferenceTest extends BaseDL4JTest { public void testUpdateModel() { int nIn = 5; - val conf = new NeuralNetConfiguration.Builder() + val conf = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") .layer("out0", new OutputLayer.Builder().nIn(nIn).nOut(4).activation(Activation.SOFTMAX).build(), "in") @@ -68,7 +68,7 @@ public class InplaceParallelInferenceTest extends BaseDL4JTest { assertEquals(net.params(), m.params()); } - val conf2 = new NeuralNetConfiguration.Builder() + val conf2 = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") .layer("out0", new OutputLayer.Builder().nIn(nIn).nOut(4).activation(Activation.SOFTMAX).build(), "in") @@ -101,7 +101,7 @@ public class InplaceParallelInferenceTest extends BaseDL4JTest { public void testOutput_RoundRobin_1() throws Exception { int nIn = 5; - val conf = new NeuralNetConfiguration.Builder() + val conf = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") .layer("out0", new OutputLayer.Builder().nIn(nIn).nOut(4).activation(Activation.SOFTMAX).build(), "in") @@ -134,7 +134,7 @@ public class InplaceParallelInferenceTest extends BaseDL4JTest { public void testOutput_FIFO_1() throws Exception { int nIn = 5; - val conf = new NeuralNetConfiguration.Builder() + val conf = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") .layer("out0", new OutputLayer.Builder().nIn(nIn).nOut(4).activation(Activation.SOFTMAX).build(), "in") diff --git a/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/ParallelInferenceTest.java b/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/ParallelInferenceTest.java index 3919bfbc7..5f1ac9a7a 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/ParallelInferenceTest.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/ParallelInferenceTest.java @@ -23,12 +23,11 @@ package org.deeplearning4j.parallelism; import lombok.NonNull; import lombok.extern.slf4j.Slf4j; import lombok.val; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.exception.DL4JInvalidInputException; -import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.ConvolutionMode; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.*; import org.deeplearning4j.nn.graph.ComputationGraph; @@ -414,7 +413,7 @@ public class ParallelInferenceTest extends BaseDL4JTest { int nIn = 10; int[] tsLengths = {3,5,7,10,50,100}; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .activation(Activation.TANH) .seed(12345) .list() @@ -459,7 +458,7 @@ public class ParallelInferenceTest extends BaseDL4JTest { int nIn = 10; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .activation(Activation.TANH) .seed(12345) .list() @@ -527,7 +526,7 @@ public class ParallelInferenceTest extends BaseDL4JTest { {1,nIn,40,45}, }; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .activation(Activation.TANH) .seed(12345) .list() @@ -575,7 +574,7 @@ public class ParallelInferenceTest extends BaseDL4JTest { int nIn = 3; int[] defaultShape = new int[]{1, nIn, 16, 16}; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .activation(Activation.TANH) .seed(12345) .convolutionMode(ConvolutionMode.Same) @@ -625,7 +624,7 @@ public class ParallelInferenceTest extends BaseDL4JTest { int nIn = 10; int wrongNIn = 5; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .activation(Activation.TANH) .seed(12345) .list() @@ -689,7 +688,7 @@ public class ParallelInferenceTest extends BaseDL4JTest { int nIn = 10; int tsLength = 16; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .activation(Activation.TANH) .seed(12345) .list() @@ -757,7 +756,7 @@ public class ParallelInferenceTest extends BaseDL4JTest { public void testModelUpdate_1() throws Exception { int nIn = 5; - val conf = new NeuralNetConfiguration.Builder() + val conf = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") .layer("out0", new OutputLayer.Builder().nIn(nIn).nOut(4).activation(Activation.SOFTMAX).build(), "in") @@ -782,12 +781,12 @@ public class ParallelInferenceTest extends BaseDL4JTest { assertNotEquals(0, output.length); } - Model[] modelsBefore = inf.getCurrentModelsFromWorkers(); + IModel[] modelsBefore = inf.getCurrentModelsFromWorkers(); assertEquals(4, modelsBefore.length); boolean passed = false; int cnt0 = 0; - for (Model m : modelsBefore) { + for (IModel m : modelsBefore) { // model can be null for some of the workers yet, due to race condition if (m != null) { Thread.sleep(500); @@ -799,7 +798,7 @@ public class ParallelInferenceTest extends BaseDL4JTest { assertTrue(passed); - val conf2 = new NeuralNetConfiguration.Builder() + val conf2 = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") .layer("out0", new OutputLayer.Builder().nIn(nIn).nOut(4).build(), "in") @@ -830,7 +829,7 @@ public class ParallelInferenceTest extends BaseDL4JTest { int nIn = 5; - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") .layer("out0", new OutputLayer.Builder().nIn(nIn).nOut(4).activation(Activation.SOFTMAX).build(), "in") diff --git a/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/ParallelWrapperTest.java b/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/ParallelWrapperTest.java index 458b9dab1..b74262dd2 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/ParallelWrapperTest.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/ParallelWrapperTest.java @@ -25,8 +25,8 @@ import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.datasets.iterator.EarlyTerminationDataSetIterator; import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; import org.deeplearning4j.eval.Evaluation; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration.NeuralNetConfigurationBuilder; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; import org.deeplearning4j.nn.conf.layers.DenseLayer; @@ -37,8 +37,6 @@ import org.deeplearning4j.nn.weights.WeightInit; import org.deeplearning4j.optimize.listeners.ScoreIterationListener; import org.junit.jupiter.api.Test; import org.nd4j.linalg.activations.Activation; -import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.learning.config.Nesterovs; import org.nd4j.linalg.lossfunctions.LossFunctions; @@ -72,7 +70,7 @@ public class ParallelWrapperTest extends BaseDL4JTest { log.info("F: {}; L: {};", t0.getFeatures().shape(), t0.getLabels().shape()); log.info("Build model...."); - MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(seed) + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().seed(seed) .l2(0.0005) //.learningRateDecayPolicy(LearningRatePolicy.Inverse).lrPolicyDecayRate(0.001).lrPolicyPower(0.75) .weightInit(WeightInit.XAVIER) @@ -90,9 +88,9 @@ public class ParallelWrapperTest extends BaseDL4JTest { .layer(4, new DenseLayer.Builder().activation(Activation.RELU).nOut(500).build()) .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nOut(outputNum).activation(Activation.SOFTMAX).build()) - .setInputType(InputType.convolutionalFlat(28, 28, nChannels)); + .inputType(InputType.convolutionalFlat(28, 28, nChannels)); - MultiLayerConfiguration conf = builder.build(); + NeuralNetConfiguration conf = builder.build(); MultiLayerNetwork model = new MultiLayerNetwork(conf); model.init(); diff --git a/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestListeners.java b/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestListeners.java index eb3ccfef8..799f2dfd7 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestListeners.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestListeners.java @@ -25,9 +25,8 @@ import org.deeplearning4j.core.storage.StatsStorage; import org.deeplearning4j.core.storage.StatsStorageRouter; import org.deeplearning4j.core.storage.listener.RoutingIterationListener; import org.deeplearning4j.datasets.iterator.ExistingDataSetIterator; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.graph.ComputationGraph; @@ -56,11 +55,11 @@ public class TestListeners extends BaseDL4JTest { public void testListeners() { TestListener.clearCounts(); - MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().list().layer(0, + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().list().layer(0, new OutputLayer.Builder(LossFunctions.LossFunction.MSE).nIn(10).nOut(10) .activation(Activation.TANH).build()); - MultiLayerConfiguration conf = builder.build(); + NeuralNetConfiguration conf = builder.build(); MultiLayerNetwork model = new MultiLayerNetwork(conf); model.init(); @@ -71,7 +70,7 @@ public class TestListeners extends BaseDL4JTest { public void testListenersGraph() { TestListener.clearCounts(); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().graphBuilder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().graphBuilder() .addInputs("in").addLayer("0", new OutputLayer.Builder(LossFunctions.LossFunction.MSE).nIn(10).nOut(10) .activation(Activation.TANH).build(), @@ -88,11 +87,11 @@ public class TestListeners extends BaseDL4JTest { public void testListenersViaModel() { TestListener.clearCounts(); - MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().list().layer(0, + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().list().layer(0, new OutputLayer.Builder(LossFunctions.LossFunction.MSE).nIn(10).nOut(10) .activation(Activation.TANH).build()); - MultiLayerConfiguration conf = builder.build(); + NeuralNetConfiguration conf = builder.build(); MultiLayerNetwork model = new MultiLayerNetwork(conf); model.init(); @@ -109,7 +108,7 @@ public class TestListeners extends BaseDL4JTest { public void testListenersViaModelGraph() { TestListener.clearCounts(); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().graphBuilder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().graphBuilder() .addInputs("in").addLayer("0", new OutputLayer.Builder(LossFunctions.LossFunction.MSE).nIn(10).nOut(10) .activation(Activation.TANH).build(), @@ -128,7 +127,7 @@ public class TestListeners extends BaseDL4JTest { assertEquals(2, ss.listWorkerIDsForSession(ss.listSessionIDs().get(0)).size()); } - private static void testListenersForModel(Model model, List listeners) { + private static void testListenersForModel(IModel model, List listeners) { int nWorkers = 2; ParallelWrapper wrapper = new ParallelWrapper.Builder(model).workers(nWorkers).averagingFrequency(1) @@ -176,26 +175,26 @@ public class TestListeners extends BaseDL4JTest { } @Override - public void onEpochStart(Model model) {} + public void onEpochStart(IModel model) {} @Override - public void onEpochEnd(Model model) {} + public void onEpochEnd(IModel model) {} @Override - public void onForwardPass(Model model, List activations) { + public void onForwardPass(IModel model, List activations) { forwardPassCount.incrementAndGet(); } @Override - public void onForwardPass(Model model, Map activations) { + public void onForwardPass(IModel model, Map activations) { forwardPassCount.incrementAndGet(); } @Override - public void onGradientCalculation(Model model) {} + public void onGradientCalculation(IModel model) {} @Override - public void onBackwardPass(Model model) { + public void onBackwardPass(IModel model) { backwardPassCount.getAndIncrement(); } @@ -233,7 +232,7 @@ public class TestListeners extends BaseDL4JTest { } @Override - public void iterationDone(Model model, int iteration, int epoch) {} + public void iterationDone(IModel model, int iteration, int epoch) {} } } diff --git a/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestParallelEarlyStopping.java b/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestParallelEarlyStopping.java index 2eaf2e850..a3b97339b 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestParallelEarlyStopping.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestParallelEarlyStopping.java @@ -32,7 +32,6 @@ import org.deeplearning4j.earlystopping.termination.MaxScoreIterationTermination import org.deeplearning4j.earlystopping.termination.MaxTimeIterationTerminationCondition; import org.deeplearning4j.earlystopping.trainer.IEarlyStoppingTrainer; import org.deeplearning4j.nn.api.OptimizationAlgorithm; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; @@ -56,7 +55,7 @@ public class TestParallelEarlyStopping extends BaseDL4JTest { // be properly designed // @Test // public void testEarlyStoppingIris(){ - // MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + // NeuralNetConfiguration conf = NeuralNetConfiguration.builder() // .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) // .updater(Updater.SGD) // .weightInit(WeightInit.XAVIER) @@ -101,7 +100,7 @@ public class TestParallelEarlyStopping extends BaseDL4JTest { @Test public void testEarlyStoppingEveryNEpoch() { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd()).weightInit(WeightInit.XAVIER).list() .layer(0, new OutputLayer.Builder().nIn(4).nOut(3) @@ -133,7 +132,7 @@ public class TestParallelEarlyStopping extends BaseDL4JTest { //Test poor tuning (high LR): should terminate on MaxScoreIterationTerminationCondition Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(1.0)) //Intentionally huge LR .weightInit(WeightInit.XAVIER).list() diff --git a/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestParallelEarlyStoppingUI.java b/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestParallelEarlyStoppingUI.java index 7bea67ef6..9d8fe7c70 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestParallelEarlyStoppingUI.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestParallelEarlyStoppingUI.java @@ -31,7 +31,6 @@ import org.deeplearning4j.earlystopping.scorecalc.DataSetLossCalculator; import org.deeplearning4j.earlystopping.termination.MaxEpochsTerminationCondition; import org.deeplearning4j.earlystopping.trainer.IEarlyStoppingTrainer; import org.deeplearning4j.nn.api.OptimizationAlgorithm; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; @@ -56,7 +55,7 @@ public class TestParallelEarlyStoppingUI extends BaseDL4JTest { public void testParallelStatsListenerCompatibility() throws Exception { UIServer uiServer = UIServer.getInstance(); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd()).weightInit(WeightInit.XAVIER).list() .layer(0, new DenseLayer.Builder().nIn(4).nOut(3).build()) diff --git a/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/factory/DefaultTrainerContextTest.java b/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/factory/DefaultTrainerContextTest.java index 3a85b4b34..306338b11 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/factory/DefaultTrainerContextTest.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/factory/DefaultTrainerContextTest.java @@ -21,8 +21,8 @@ package org.deeplearning4j.parallelism.factory; import lombok.val; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration.NeuralNetConfigurationBuilder; import org.deeplearning4j.nn.conf.WorkspaceMode; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; @@ -33,7 +33,6 @@ import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.weights.WeightInit; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.parallelism.ParallelWrapper; -import org.deeplearning4j.parallelism.trainer.SymmetricTrainer; import org.junit.jupiter.api.Test; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.learning.config.Nesterovs; @@ -50,11 +49,11 @@ public class DefaultTrainerContextTest extends BaseDL4JTest { @Test public void testEqualUuid1() { - MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(seed) + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().seed(seed) .l2(0.0005) //.learningRateDecayPolicy(LearningRatePolicy.Inverse).lrPolicyDecayRate(0.001).lrPolicyPower(0.75) .weightInit(WeightInit.XAVIER) - .updater(new Nesterovs(0.01, 0.9)).list() + .updater(new Nesterovs(0.01, 0.9)) .layer(0, new ConvolutionLayer.Builder(5, 5) //nIn and nOut specify depth. nIn here is the nChannels and nOut is the number of filters to be applied .nIn(nChannels).stride(1, 1).nOut(20).activation(Activation.IDENTITY).build()) @@ -68,9 +67,9 @@ public class DefaultTrainerContextTest extends BaseDL4JTest { .layer(4, new DenseLayer.Builder().activation(Activation.RELU).nOut(500).build()) .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nOut(outputNum).activation(Activation.SOFTMAX).build()) - .setInputType(InputType.convolutionalFlat(28, 28, nChannels)); + .inputType(InputType.convolutionalFlat(28, 28, nChannels)); - MultiLayerConfiguration conf = builder.build(); + NeuralNetConfiguration conf = builder.build(); MultiLayerNetwork model = new MultiLayerNetwork(conf); model.init(); diff --git a/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/factory/SymmetricTrainerContextTest.java b/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/factory/SymmetricTrainerContextTest.java index ec82896df..b61f820f7 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/factory/SymmetricTrainerContextTest.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/factory/SymmetricTrainerContextTest.java @@ -21,8 +21,8 @@ package org.deeplearning4j.parallelism.factory; import lombok.val; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration.NeuralNetConfigurationBuilder; import org.deeplearning4j.nn.conf.WorkspaceMode; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; @@ -50,7 +50,7 @@ public class SymmetricTrainerContextTest extends BaseDL4JTest { @Test public void testEqualUuid1() { - MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(seed) + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().seed(seed) .l2(0.0005) //.learningRateDecayPolicy(LearningRatePolicy.Inverse).lrPolicyDecayRate(0.001).lrPolicyPower(0.75) .weightInit(WeightInit.XAVIER) @@ -68,9 +68,9 @@ public class SymmetricTrainerContextTest extends BaseDL4JTest { .layer(4, new DenseLayer.Builder().activation(Activation.RELU).nOut(500).build()) .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nOut(outputNum).activation(Activation.SOFTMAX).build()) - .setInputType(InputType.convolutionalFlat(28, 28, nChannels)); + .inputType(InputType.convolutionalFlat(28, 28, nChannels)); - MultiLayerConfiguration conf = builder.build(); + NeuralNetConfiguration conf = builder.build(); MultiLayerNetwork model = new MultiLayerNetwork(conf); model.init(); diff --git a/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/main/ParallelWrapperMainTest.java b/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/main/ParallelWrapperMainTest.java index 315788855..da27c4c63 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/main/ParallelWrapperMainTest.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/main/ParallelWrapperMainTest.java @@ -22,8 +22,8 @@ package org.deeplearning4j.parallelism.main; import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration.NeuralNetConfigurationBuilder; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; import org.deeplearning4j.nn.conf.layers.DenseLayer; @@ -66,7 +66,7 @@ public class ParallelWrapperMainTest extends BaseDL4JTest { DataSetIterator mnistTest = new MnistDataSetIterator(batchSize, false, 12345); log.info("Build model...."); - MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(seed) + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().seed(seed) .l2(0.0005) .weightInit(WeightInit.XAVIER) .updater(new Nesterovs(0.01, 0.9)).list() @@ -83,9 +83,9 @@ public class ParallelWrapperMainTest extends BaseDL4JTest { .layer(4, new DenseLayer.Builder().activation(Activation.RELU).nOut(500).build()) .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nOut(outputNum).activation(Activation.SOFTMAX).build()) - .setInputType(InputType.convolutionalFlat(28, 28, nChannels)); + .inputType(InputType.convolutionalFlat(28, 28, nChannels)); - MultiLayerConfiguration conf = builder.build(); + NeuralNetConfiguration conf = builder.build(); MultiLayerNetwork model = new MultiLayerNetwork(conf); model.init(); File tempModel = new File(testDir, "tmpmodel.zip"); diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/api/TrainingHook.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/api/TrainingHook.java index 8ea9738db..2adec7f64 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/api/TrainingHook.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/api/TrainingHook.java @@ -20,7 +20,7 @@ package org.deeplearning4j.spark.api; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.nd4j.linalg.dataset.api.DataSet; import org.nd4j.linalg.dataset.api.MultiDataSet; @@ -33,7 +33,7 @@ public interface TrainingHook extends Serializable { * that was used for the update * @param model themodel that was update */ - void preUpdate(DataSet minibatch, Model model); + void preUpdate(DataSet minibatch, IModel model); /** * A hook method for post update @@ -41,7 +41,7 @@ public interface TrainingHook extends Serializable { * that was usd for the update * @param model the model that was updated */ - void postUpdate(DataSet minibatch, Model model); + void postUpdate(DataSet minibatch, IModel model); /** * A hook method for pre update. @@ -49,7 +49,7 @@ public interface TrainingHook extends Serializable { * that was used for the update * @param model the model that was update */ - void preUpdate(MultiDataSet minibatch, Model model); + void preUpdate(MultiDataSet minibatch, IModel model); /** * A hook method for post update @@ -57,6 +57,6 @@ public interface TrainingHook extends Serializable { * that was usd for the update * @param model the model that was updated */ - void postUpdate(MultiDataSet minibatch, Model model); + void postUpdate(MultiDataSet minibatch, IModel model); } diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/api/worker/NetBroadcastTuple.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/api/worker/NetBroadcastTuple.java index 9fa317026..f0d69c039 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/api/worker/NetBroadcastTuple.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/api/worker/NetBroadcastTuple.java @@ -22,7 +22,7 @@ package org.deeplearning4j.spark.api.worker; import lombok.Data; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.nd4j.linalg.api.ndarray.INDArray; import java.io.Serializable; @@ -31,13 +31,13 @@ import java.util.concurrent.atomic.AtomicInteger; @Data public class NetBroadcastTuple implements Serializable { - private final MultiLayerConfiguration configuration; + private final NeuralNetConfiguration configuration; private final ComputationGraphConfiguration graphConfiguration; private final INDArray parameters; private final INDArray updaterState; private final AtomicInteger counter; - public NetBroadcastTuple(MultiLayerConfiguration configuration, INDArray parameters, INDArray updaterState) { + public NetBroadcastTuple(NeuralNetConfiguration configuration, INDArray parameters, INDArray updaterState) { this(configuration, null, parameters, updaterState); } @@ -47,12 +47,12 @@ public class NetBroadcastTuple implements Serializable { } - public NetBroadcastTuple(MultiLayerConfiguration configuration, ComputationGraphConfiguration graphConfiguration, + public NetBroadcastTuple(NeuralNetConfiguration configuration, ComputationGraphConfiguration graphConfiguration, INDArray parameters, INDArray updaterState) { this(configuration, graphConfiguration, parameters, updaterState, new AtomicInteger(0)); } - public NetBroadcastTuple(MultiLayerConfiguration configuration, ComputationGraphConfiguration graphConfiguration, + public NetBroadcastTuple(NeuralNetConfiguration configuration, ComputationGraphConfiguration graphConfiguration, INDArray parameters, INDArray updaterState, AtomicInteger counter) { this.configuration = configuration; this.graphConfiguration = graphConfiguration; diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/earlystopping/BaseSparkEarlyStoppingTrainer.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/earlystopping/BaseSparkEarlyStoppingTrainer.java index 5ed1848b7..8d799c1b2 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/earlystopping/BaseSparkEarlyStoppingTrainer.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/earlystopping/BaseSparkEarlyStoppingTrainer.java @@ -29,7 +29,7 @@ import org.deeplearning4j.earlystopping.scorecalc.ScoreCalculator; import org.deeplearning4j.earlystopping.termination.EpochTerminationCondition; import org.deeplearning4j.earlystopping.termination.IterationTerminationCondition; import org.deeplearning4j.earlystopping.trainer.IEarlyStoppingTrainer; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.dataset.api.MultiDataSet; import org.slf4j.Logger; @@ -39,7 +39,7 @@ import java.io.IOException; import java.util.LinkedHashMap; import java.util.Map; -public abstract class BaseSparkEarlyStoppingTrainer implements IEarlyStoppingTrainer { +public abstract class BaseSparkEarlyStoppingTrainer implements IEarlyStoppingTrainer { private static final Logger log = LoggerFactory.getLogger(BaseSparkEarlyStoppingTrainer.class); diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/common/score/BaseVaeReconstructionProbWithKeyFunction.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/common/score/BaseVaeReconstructionProbWithKeyFunction.java index ed302a351..e1bfc277f 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/common/score/BaseVaeReconstructionProbWithKeyFunction.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/common/score/BaseVaeReconstructionProbWithKeyFunction.java @@ -31,7 +31,7 @@ public abstract class BaseVaeReconstructionProbWithKeyFunction extends BaseVa /** * @param params MultiLayerNetwork parameters - * @param jsonConfig MultiLayerConfiguration, as json + * @param jsonConfig NeuralNetConfiguration, as json * @param useLogProbability If true: use log probability. False: use raw probability. * @param batchSize Batch size to use when scoring * @param numSamples Number of samples to use when calling {@link VariationalAutoencoder#reconstructionLogProbability(INDArray, int)} diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/common/score/BaseVaeScoreWithKeyFunction.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/common/score/BaseVaeScoreWithKeyFunction.java index 4140b8a53..cfcc93b78 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/common/score/BaseVaeScoreWithKeyFunction.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/common/score/BaseVaeScoreWithKeyFunction.java @@ -45,7 +45,7 @@ public abstract class BaseVaeScoreWithKeyFunction implements PairFlatMapFunct /** * @param params MultiLayerNetwork parameters - * @param jsonConfig MultiLayerConfiguration, as json + * @param jsonConfig NeuralNetConfiguration, as json * @param batchSize Batch size to use when scoring */ public BaseVaeScoreWithKeyFunction(Broadcast params, Broadcast jsonConfig, int batchSize) { diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/evaluation/EvaluationRunner.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/evaluation/EvaluationRunner.java index a38322234..beb8d7972 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/evaluation/EvaluationRunner.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/evaluation/EvaluationRunner.java @@ -22,12 +22,12 @@ package org.deeplearning4j.spark.impl.evaluation; import lombok.*; import lombok.extern.slf4j.Slf4j; +import net.brutex.ai.dnn.api.IModel; import org.apache.spark.broadcast.Broadcast; import org.deeplearning4j.datasets.iterator.IteratorDataSetIterator; import org.deeplearning4j.datasets.iterator.IteratorMultiDataSetIterator; -import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.nd4j.common.base.Preconditions; @@ -124,14 +124,14 @@ public class EvaluationRunner { EvaluationFuture f = new EvaluationFuture(); f.setResult(evals); try { - Model m; + IModel m; if (isCG) { ComputationGraphConfiguration conf = ComputationGraphConfiguration.fromJson(json.getValue()); ComputationGraph cg = new ComputationGraph(conf); cg.init(deviceLocalParams.get(), false); m = cg; } else { - MultiLayerConfiguration conf = MultiLayerConfiguration.fromJson(json.getValue()); + NeuralNetConfiguration conf = NeuralNetConfiguration.fromJson(json.getValue()); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(deviceLocalParams.get(), false); m = net; @@ -176,7 +176,7 @@ public class EvaluationRunner { return f; } - private static void doEval(Model m, IEvaluation[] e, Iterator ds, Iterator mds, int evalBatchSize){ + private static void doEval(IModel m, IEvaluation[] e, Iterator ds, Iterator mds, int evalBatchSize){ if(m instanceof MultiLayerNetwork){ MultiLayerNetwork mln = (MultiLayerNetwork)m; if(ds != null){ diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/graph/SparkComputationGraph.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/graph/SparkComputationGraph.java index e460ddc2f..84c7cf753 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/graph/SparkComputationGraph.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/graph/SparkComputationGraph.java @@ -632,7 +632,7 @@ public class SparkComputationGraph extends SparkListenable { * @return {@link RegressionEvaluation} instance with regression performance */ public T evaluateRegression(JavaRDD data, int minibatchSize) { - val nOut = ((FeedForwardLayer) network.getOutputLayer(0).conf().getLayer()).getNOut(); + val nOut = ((FeedForwardLayer) network.getOutputLayer(0).getLayerConfiguration()).getNOut(); return (T)doEvaluation(data, new org.deeplearning4j.eval.RegressionEvaluation(nOut), minibatchSize); } diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/graph/scoring/CGVaeReconstructionErrorWithKeyFunction.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/graph/scoring/CGVaeReconstructionErrorWithKeyFunction.java index b7da3d143..f6794f27d 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/graph/scoring/CGVaeReconstructionErrorWithKeyFunction.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/graph/scoring/CGVaeReconstructionErrorWithKeyFunction.java @@ -33,7 +33,7 @@ public class CGVaeReconstructionErrorWithKeyFunction extends BaseVaeScoreWith /** * @param params MultiLayerNetwork parameters - * @param jsonConfig MultiLayerConfiguration, as json + * @param jsonConfig NeuralNetConfiguration, as json * @param batchSize Batch size to use when scoring */ public CGVaeReconstructionErrorWithKeyFunction(Broadcast params, Broadcast jsonConfig, diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/graph/scoring/CGVaeReconstructionProbWithKeyFunction.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/graph/scoring/CGVaeReconstructionProbWithKeyFunction.java index 43defe37f..b5413e0dc 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/graph/scoring/CGVaeReconstructionProbWithKeyFunction.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/graph/scoring/CGVaeReconstructionProbWithKeyFunction.java @@ -33,7 +33,7 @@ public class CGVaeReconstructionProbWithKeyFunction extends BaseVaeReconstruc /** * @param params MultiLayerNetwork parameters - * @param jsonConfig MultiLayerConfiguration, as json + * @param jsonConfig NeuralNetConfiguration, as json * @param useLogProbability If true: use log probability. False: use raw probability. * @param batchSize Batch size to use when scoring * @param numSamples Number of samples to use when calling {@link VariationalAutoencoder#reconstructionLogProbability(INDArray, int)} diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/multilayer/SparkDl4jMultiLayer.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/multilayer/SparkDl4jMultiLayer.java index d8e1c1437..2e50414da 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/multilayer/SparkDl4jMultiLayer.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/multilayer/SparkDl4jMultiLayer.java @@ -35,7 +35,7 @@ import org.datavec.spark.util.BroadcastHadoopConfigHolder; import org.deeplearning4j.core.loader.DataSetLoader; import org.deeplearning4j.core.loader.MultiDataSetLoader; import org.deeplearning4j.core.loader.impl.SerializedDataSetLoader; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.FeedForwardLayer; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.spark.api.TrainingMaster; @@ -80,7 +80,7 @@ public class SparkDl4jMultiLayer extends SparkListenable { public static final int DEFAULT_ROC_THRESHOLD_STEPS = 32; public static final int DEFAULT_EVAL_WORKERS = 4; private final transient JavaSparkContext sc; - private final MultiLayerConfiguration conf; + private final NeuralNetConfiguration conf; private MultiLayerNetwork network; private double lastScore; private int defaultEvaluationWorkers = DEFAULT_EVAL_WORKERS; @@ -104,7 +104,7 @@ public class SparkDl4jMultiLayer extends SparkListenable { * @param sparkContext the spark context to use * @param conf the configuration of the network */ - public SparkDl4jMultiLayer(SparkContext sparkContext, MultiLayerConfiguration conf, + public SparkDl4jMultiLayer(SparkContext sparkContext, NeuralNetConfiguration conf, TrainingMaster trainingMaster) { this(new JavaSparkContext(sparkContext), initNetwork(conf), trainingMaster); } @@ -115,14 +115,14 @@ public class SparkDl4jMultiLayer extends SparkListenable { * @param sc the spark context to use * @param conf the configuration of the network */ - public SparkDl4jMultiLayer(JavaSparkContext sc, MultiLayerConfiguration conf, TrainingMaster trainingMaster) { + public SparkDl4jMultiLayer(JavaSparkContext sc, NeuralNetConfiguration conf, TrainingMaster trainingMaster) { this(sc.sc(), conf, trainingMaster); } public SparkDl4jMultiLayer(JavaSparkContext javaSparkContext, MultiLayerNetwork network, TrainingMaster trainingMaster) { sc = javaSparkContext; - this.conf = network.getLayerWiseConfigurations().clone(); + this.conf = network.getConfiguration().clone(); this.network = network; if (!network.isInitCalled()) network.init(); @@ -132,7 +132,7 @@ public class SparkDl4jMultiLayer extends SparkListenable { SparkUtils.checkKryoConfiguration(javaSparkContext, log); } - private static MultiLayerNetwork initNetwork(MultiLayerConfiguration conf) { + private static MultiLayerNetwork initNetwork(NeuralNetConfiguration conf) { MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); return net; @@ -315,8 +315,8 @@ public class SparkDl4jMultiLayer extends SparkListenable { * @return the multi layer network that was fitDataSet */ public MultiLayerNetwork fitLabeledPoint(JavaRDD rdd) { - int nLayers = network.getLayerWiseConfigurations().getConfs().size(); - FeedForwardLayer ffl = (FeedForwardLayer) network.getLayerWiseConfigurations().getConf(nLayers - 1).getLayer(); + int nLayers = network.getConfiguration().getFlattenedLayerConfigurations().size(); + FeedForwardLayer ffl = (FeedForwardLayer) network.getConfiguration().getFlattenedLayerConfigurations().get(nLayers - 1); JavaRDD ds = MLLibUtil.fromLabeledPoint(sc, rdd, ffl.getNOut()); return fit(ds); } @@ -577,7 +577,7 @@ public class SparkDl4jMultiLayer extends SparkListenable { * @return {@link RegressionEvaluation} instance with regression performance */ public T evaluateRegression(JavaRDD data, int minibatchSize) { - long nOut = ((FeedForwardLayer) network.getOutputLayer().conf().getLayer()).getNOut(); + long nOut = ((FeedForwardLayer) network.getOutputLayer().getLayerConfiguration()).getNOut(); return (T)doEvaluation(data, new org.deeplearning4j.eval.RegressionEvaluation(nOut), minibatchSize); } diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/multilayer/scoring/FeedForwardWithKeyFunction.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/multilayer/scoring/FeedForwardWithKeyFunction.java index 510f2e4d4..c064c81d0 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/multilayer/scoring/FeedForwardWithKeyFunction.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/multilayer/scoring/FeedForwardWithKeyFunction.java @@ -22,7 +22,7 @@ package org.deeplearning4j.spark.impl.multilayer.scoring; import org.apache.spark.api.java.function.PairFlatMapFunction; import org.apache.spark.broadcast.Broadcast; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.api.DataSetUtil; @@ -49,7 +49,7 @@ public class FeedForwardWithKeyFunction /** * @param params MultiLayerNetwork parameters - * @param jsonConfig MultiLayerConfiguration, as json + * @param jsonConfig NeuralNetConfiguration, as json * @param batchSize Batch size to use for forward pass (use > 1 for efficiency) */ public FeedForwardWithKeyFunction(Broadcast params, Broadcast jsonConfig, int batchSize) { @@ -65,7 +65,7 @@ public class FeedForwardWithKeyFunction return Collections.emptyIterator(); } - MultiLayerNetwork network = new MultiLayerNetwork(MultiLayerConfiguration.fromJson(jsonConfig.getValue())); + MultiLayerNetwork network = new MultiLayerNetwork(NeuralNetConfiguration.fromJson(jsonConfig.getValue())); network.init(); INDArray val = params.value().unsafeDuplication(); if (val.length() != network.numParams(false)) diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/multilayer/scoring/ScoreExamplesFunction.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/multilayer/scoring/ScoreExamplesFunction.java index 6c3878da5..b6a21d181 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/multilayer/scoring/ScoreExamplesFunction.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/multilayer/scoring/ScoreExamplesFunction.java @@ -21,9 +21,8 @@ package org.deeplearning4j.spark.impl.multilayer.scoring; import org.apache.spark.api.java.function.DoubleFlatMapFunction; -import org.apache.spark.api.java.function.FlatMapFunction; import org.apache.spark.broadcast.Broadcast; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.DataSet; @@ -60,7 +59,7 @@ public class ScoreExamplesFunction implements DoubleFlatMapFunction implements PairFlatMapFunction implements PairFlatMapFunction, DataSetIterator iter = new IteratorDataSetIterator(dataSetIterator, minibatchSize); //Does batching where appropriate - MultiLayerNetwork network = new MultiLayerNetwork(MultiLayerConfiguration.fromJson(json)); + MultiLayerNetwork network = new MultiLayerNetwork(NeuralNetConfiguration.fromJson(json)); network.init(); INDArray val = params.value().unsafeDuplication(); //.value() object will be shared by all executors on each machine -> OK, as params are not modified by score function if (val.length() != network.numParams(false)) diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/multilayer/scoring/VaeReconstructionErrorWithKeyFunction.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/multilayer/scoring/VaeReconstructionErrorWithKeyFunction.java index a0bcca02b..d9901cbe0 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/multilayer/scoring/VaeReconstructionErrorWithKeyFunction.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/multilayer/scoring/VaeReconstructionErrorWithKeyFunction.java @@ -22,21 +22,18 @@ package org.deeplearning4j.spark.impl.multilayer.scoring; import org.apache.spark.broadcast.Broadcast; import org.deeplearning4j.nn.api.Layer; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.layers.variational.VariationalAutoencoder; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.spark.impl.common.score.BaseVaeScoreWithKeyFunction; import org.nd4j.linalg.api.ndarray.INDArray; -import scala.Tuple2; - -import java.util.Iterator; public class VaeReconstructionErrorWithKeyFunction extends BaseVaeScoreWithKeyFunction { /** * @param params MultiLayerNetwork parameters - * @param jsonConfig MultiLayerConfiguration, as json + * @param jsonConfig NeuralNetConfiguration, as json * @param batchSize Batch size to use when scoring */ public VaeReconstructionErrorWithKeyFunction(Broadcast params, Broadcast jsonConfig, @@ -47,7 +44,7 @@ public class VaeReconstructionErrorWithKeyFunction extends BaseVaeScoreWithKe @Override public VariationalAutoencoder getVaeLayer() { MultiLayerNetwork network = - new MultiLayerNetwork(MultiLayerConfiguration.fromJson(jsonConfig.getValue())); + new MultiLayerNetwork(NeuralNetConfiguration.fromJson(jsonConfig.getValue())); network.init(); INDArray val = params.value().unsafeDuplication(); if (val.length() != network.numParams(false)) diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/multilayer/scoring/VaeReconstructionProbWithKeyFunction.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/multilayer/scoring/VaeReconstructionProbWithKeyFunction.java index d65084dc5..b7cdbd403 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/multilayer/scoring/VaeReconstructionProbWithKeyFunction.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/multilayer/scoring/VaeReconstructionProbWithKeyFunction.java @@ -22,7 +22,7 @@ package org.deeplearning4j.spark.impl.multilayer.scoring; import org.apache.spark.broadcast.Broadcast; import org.deeplearning4j.nn.api.Layer; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.layers.variational.VariationalAutoencoder; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.spark.impl.common.score.BaseVaeReconstructionProbWithKeyFunction; @@ -34,7 +34,7 @@ public class VaeReconstructionProbWithKeyFunction extends BaseVaeReconstructi /** * @param params MultiLayerNetwork parameters - * @param jsonConfig MultiLayerConfiguration, as json + * @param jsonConfig NeuralNetConfiguration, as json * @param useLogProbability If true: use log probability. False: use raw probability. * @param batchSize Batch size to use when scoring * @param numSamples Number of samples to use when calling {@link VariationalAutoencoder#reconstructionLogProbability(INDArray, int)} @@ -47,7 +47,7 @@ public class VaeReconstructionProbWithKeyFunction extends BaseVaeReconstructi @Override public VariationalAutoencoder getVaeLayer() { MultiLayerNetwork network = - new MultiLayerNetwork(MultiLayerConfiguration.fromJson(jsonConfig.getValue())); + new MultiLayerNetwork(NeuralNetConfiguration.fromJson(jsonConfig.getValue())); network.init(); INDArray val = params.value().unsafeDuplication(); if (val.length() != network.numParams(false)) diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/paramavg/ParameterAveragingTrainingMaster.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/paramavg/ParameterAveragingTrainingMaster.java index 4a0252b28..1dc1d4f1b 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/paramavg/ParameterAveragingTrainingMaster.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/paramavg/ParameterAveragingTrainingMaster.java @@ -41,7 +41,7 @@ import org.deeplearning4j.core.storage.StatsStorageRouter; import org.deeplearning4j.core.storage.StatsStorageRouterProvider; import org.deeplearning4j.core.storage.StorageMetaData; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.optimize.api.TrainingListener; @@ -275,7 +275,7 @@ public class ParameterAveragingTrainingMaster @Override public ParameterAveragingTrainingWorker getWorkerInstance(SparkDl4jMultiLayer network) { - NetBroadcastTuple tuple = new NetBroadcastTuple(network.getNetwork().getLayerWiseConfigurations(), + NetBroadcastTuple tuple = new NetBroadcastTuple(network.getNetwork().getConfiguration(), network.getNetwork().params(), network.getNetwork().getUpdater().getStateViewArray()); if (collectTrainingStats) @@ -727,7 +727,7 @@ public class ParameterAveragingTrainingMaster if (params != null) { //Params may be null for edge case (empty RDD) if (network != null) { - MultiLayerConfiguration conf = network.getNetwork().getLayerWiseConfigurations(); + NeuralNetConfiguration conf = network.getNetwork().getConfiguration(); int numUpdates = averagingFrequency; conf.setIterationCount(conf.getIterationCount() + numUpdates); } else { diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/paramavg/ParameterAveragingTrainingWorker.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/paramavg/ParameterAveragingTrainingWorker.java index 87374a584..4820e938f 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/paramavg/ParameterAveragingTrainingWorker.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/main/java/org/deeplearning4j/spark/impl/paramavg/ParameterAveragingTrainingWorker.java @@ -21,13 +21,13 @@ package org.deeplearning4j.spark.impl.paramavg; import lombok.val; +import net.brutex.ai.dnn.api.IModel; import org.apache.spark.broadcast.Broadcast; import org.deeplearning4j.core.storage.Persistable; import org.deeplearning4j.core.storage.StatsStorageRouter; import org.deeplearning4j.core.storage.StatsStorageRouterProvider; import org.deeplearning4j.core.storage.StorageMetaData; import org.deeplearning4j.core.storage.listener.RoutingIterationListener; -import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.nn.api.Updater; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.graph.util.ComputationGraphUtil; @@ -159,7 +159,7 @@ public class ParameterAveragingTrainingWorker extends BaseTrainingWorker list = new ArrayList<>(trainingListeners.size()); for (TrainingListener l : trainingListeners) { diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/BaseSparkTest.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/BaseSparkTest.java index 5a8ac5d7e..e8412455a 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/BaseSparkTest.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/BaseSparkTest.java @@ -20,13 +20,10 @@ package org.deeplearning4j.spark; -import org.apache.hadoop.conf.Configuration; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; -import org.datavec.spark.util.SerializableHadoopConfig; import org.deeplearning4j.BaseDL4JTest; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.spark.impl.multilayer.SparkDl4jMultiLayer; import org.deeplearning4j.spark.impl.paramavg.ParameterAveragingTrainingMaster; @@ -124,8 +121,8 @@ public abstract class BaseSparkTest extends BaseDL4JTest implements Serializable return 4; } - protected MultiLayerConfiguration getBasicConf() { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(123) + protected NeuralNetConfiguration getBasicConf() { + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(123) .updater(new Nesterovs(0.1, 0.9)).list() .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(nIn).nOut(3) .activation(Activation.TANH).build()) diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSpark.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSpark.java index f4e9f674e..09d147283 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSpark.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSpark.java @@ -35,7 +35,6 @@ import org.deeplearning4j.earlystopping.termination.MaxTimeIterationTerminationC import org.deeplearning4j.earlystopping.termination.ScoreImprovementEpochTerminationCondition; import org.deeplearning4j.earlystopping.trainer.IEarlyStoppingTrainer; import org.deeplearning4j.nn.api.OptimizationAlgorithm; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; @@ -68,7 +67,7 @@ public class TestEarlyStoppingSpark extends BaseSparkTest { //Spark tests don't run on windows return; } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd()).weightInit(WeightInit.XAVIER).list() .layer(0, new OutputLayer.Builder().nIn(4).nOut(3) @@ -123,7 +122,7 @@ public class TestEarlyStoppingSpark extends BaseSparkTest { return; } Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(10.0)) //Intentionally huge LR .weightInit(WeightInit.XAVIER).list() @@ -163,7 +162,7 @@ public class TestEarlyStoppingSpark extends BaseSparkTest { return; } Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(1e-6)).weightInit(WeightInit.XAVIER).list() .layer(0, new OutputLayer.Builder().nIn(4).nOut(3) @@ -209,7 +208,7 @@ public class TestEarlyStoppingSpark extends BaseSparkTest { return; } Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(0.0)).weightInit(WeightInit.XAVIER).list() .layer(0, new OutputLayer.Builder().nIn(4).nOut(3) @@ -246,7 +245,7 @@ public class TestEarlyStoppingSpark extends BaseSparkTest { //Spark tests don't run on windows return; } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd()).weightInit(WeightInit.XAVIER).list() .layer(0, new OutputLayer.Builder().nIn(4).nOut(3) diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSparkCompGraph.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSparkCompGraph.java index 39618055e..f0e1fefb1 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSparkCompGraph.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSparkCompGraph.java @@ -71,7 +71,7 @@ public class TestEarlyStoppingSparkCompGraph extends BaseSparkTest { //Spark tests don't run on windows return; } - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd()).weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in") .addLayer("0", new OutputLayer.Builder().nIn(4).nOut(3) @@ -124,7 +124,7 @@ public class TestEarlyStoppingSparkCompGraph extends BaseSparkTest { return; } Nd4j.getRandom().setSeed(12345); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(2.0)) //Intentionally huge LR .weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in") @@ -165,7 +165,7 @@ public class TestEarlyStoppingSparkCompGraph extends BaseSparkTest { return; } Nd4j.getRandom().setSeed(12345); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(1e-6)).weightInit(WeightInit.XAVIER).graphBuilder() .addInputs("in") @@ -213,7 +213,7 @@ public class TestEarlyStoppingSparkCompGraph extends BaseSparkTest { return; } Nd4j.getRandom().setSeed(12345); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(0.0)).weightInit(WeightInit.XAVIER).graphBuilder() .addInputs("in") @@ -253,7 +253,7 @@ public class TestEarlyStoppingSparkCompGraph extends BaseSparkTest { //Spark tests don't run on windows return; } - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd()).weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in") .addLayer("0", new OutputLayer.Builder().nIn(4).nOut(3) diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/TestKryo.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/TestKryo.java index da5d7822a..7815303f0 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/TestKryo.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/TestKryo.java @@ -22,7 +22,6 @@ package org.deeplearning4j.spark; import org.apache.spark.serializer.SerializerInstance; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.distribution.UniformDistribution; import org.deeplearning4j.nn.conf.graph.*; @@ -68,16 +67,16 @@ public class TestKryo extends BaseSparkKryoTest { Map m = new HashMap<>(); m.put(0, 0.5); m.put(10, 0.1); - MultiLayerConfiguration mlc = new NeuralNetConfiguration.Builder() - .updater(new Nadam(new MapSchedule(ScheduleType.ITERATION,m))).list().layer(0, new OutputLayer.Builder().nIn(10).nOut(10).build()) + NeuralNetConfiguration mlc = NeuralNetConfiguration.builder() + .updater(new Nadam(new MapSchedule(ScheduleType.ITERATION,m))).layer(0, new OutputLayer.Builder().nIn(10).nOut(10).build()) .build(); testSerialization(mlc, si); - ComputationGraphConfiguration cgc = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration cgc = ((NeuralNetConfiguration.NeuralNetConfigurationBuilder)NeuralNetConfiguration.builder() .dist(new UniformDistribution(-1, 1)) - .updater(new Adam(new MapSchedule(ScheduleType.ITERATION,m))) + .updater(new Adam(new MapSchedule(ScheduleType.ITERATION,m)))) .graphBuilder() .addInputs("in").addLayer("out", new OutputLayer.Builder().nIn(10).nOut(10).build(), "in") .setOutputs("out").build(); @@ -86,7 +85,7 @@ public class TestKryo extends BaseSparkKryoTest { //Check main layers: - Layer[] layers = new Layer[] {new OutputLayer.Builder().nIn(10).nOut(10).build(), + LayerConfiguration[] layers = new LayerConfiguration[] {new OutputLayer.Builder().nIn(10).nOut(10).build(), new RnnOutputLayer.Builder().nIn(10).nOut(10).build(), new LossLayer.Builder().build(), new CenterLossOutputLayer.Builder().nIn(10).nOut(10).build(), new DenseLayer.Builder().nIn(10).nOut(10).build(), @@ -97,7 +96,7 @@ public class TestKryo extends BaseSparkKryoTest { new LSTM.Builder().nIn(10).nOut(10).build(), new DropoutLayer.Builder(0.5).build(), new BatchNormalization.Builder().build(), new LocalResponseNormalization.Builder().build()}; - for (Layer l : layers) { + for (LayerConfiguration l : layers) { testSerialization(l, si); } diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/datavec/TestPreProcessedData.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/datavec/TestPreProcessedData.java index 714c3ffb6..cc32d9723 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/datavec/TestPreProcessedData.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/datavec/TestPreProcessedData.java @@ -30,7 +30,6 @@ import org.datavec.api.records.reader.impl.csv.CSVRecordReader; import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.spark.BaseSparkTest; @@ -84,7 +83,7 @@ public class TestPreProcessedData extends BaseSparkTest { iter.next().save(f2); } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.RMSPROP) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(Updater.RMSPROP.getIUpdaterWithDefaultConfig()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(4).nOut(3) .activation(Activation.TANH).build()) @@ -134,7 +133,7 @@ public class TestPreProcessedData extends BaseSparkTest { iter.next().save(f2); } - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.RMSPROP) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().updater(Updater.RMSPROP.getIUpdaterWithDefaultConfig()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .graphBuilder().addInputs("in") .addLayer("0", new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(4).nOut(3) @@ -188,7 +187,7 @@ public class TestPreProcessedData extends BaseSparkTest { mds.save(f2); } - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.RMSPROP) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().updater(Updater.RMSPROP.getIUpdaterWithDefaultConfig()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .graphBuilder().addInputs("in") .addLayer("0", new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(4).nOut(3) diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/TestKryoWarning.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/TestKryoWarning.java index ec2195081..402ecb46a 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/TestKryoWarning.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/TestKryoWarning.java @@ -23,7 +23,6 @@ package org.deeplearning4j.spark.impl; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaSparkContext; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.spark.api.TrainingMaster; @@ -40,7 +39,7 @@ public class TestKryoWarning { try { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() .layer(0, new OutputLayer.Builder().nIn(10).nOut(10).build()) .build(); @@ -57,7 +56,7 @@ public class TestKryoWarning { try { - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().graphBuilder().addInputs("in") + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().graphBuilder().addInputs("in") .addLayer("0", new OutputLayer.Builder().nIn(10).nOut(10).build(), "in").setOutputs("0") .build(); diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/customlayer/TestCustomLayer.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/customlayer/TestCustomLayer.java index b3c96333d..d8b0ddb0a 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/customlayer/TestCustomLayer.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/customlayer/TestCustomLayer.java @@ -22,7 +22,6 @@ package org.deeplearning4j.spark.impl.customlayer; import com.sun.jna.Platform; import org.apache.spark.api.java.JavaRDD; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; @@ -51,8 +50,8 @@ public class TestCustomLayer extends BaseSparkTest { } //Basic test - checks whether exceptions etc are thrown with custom layers + spark //Custom layers are tested more extensively in dl4j core - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().updater(new Sgd(0.1)).list() + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder().updater(new Sgd(0.1)).list() .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()) .layer(1, new CustomLayer(3.14159)).layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/customlayer/layer/CustomLayer.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/customlayer/layer/CustomLayer.java index 189e1f529..a9a8e1293 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/customlayer/layer/CustomLayer.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/customlayer/layer/CustomLayer.java @@ -57,9 +57,9 @@ public class CustomLayer extends FeedForwardLayer { ret.setListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(conf, layerParamsView, initializeParams); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setConf(conf); + ret.setLayerConfiguration(conf); return ret; } diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/graph/TestSparkComputationGraph.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/graph/TestSparkComputationGraph.java index 579effe1a..109add55d 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/graph/TestSparkComputationGraph.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/graph/TestSparkComputationGraph.java @@ -77,7 +77,7 @@ public class TestSparkComputationGraph extends BaseSparkTest { public static ComputationGraph getBasicNetIris2Class() { - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER) .graphBuilder().addInputs("in") .addLayer("l0", new DenseLayer.Builder().nIn(4).nOut(10).build(), "in") .addLayer("l1", new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) @@ -104,7 +104,7 @@ public class TestSparkComputationGraph extends BaseSparkTest { while (iter.hasNext()) list.add(iter.next()); - ComputationGraphConfiguration config = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration config = NeuralNetConfiguration.builder() .updater(new Sgd(0.1)) .graphBuilder().addInputs("in") .addLayer("dense", new DenseLayer.Builder().nIn(4).nOut(2).build(), "in").addLayer("out", @@ -138,7 +138,7 @@ public class TestSparkComputationGraph extends BaseSparkTest { @Test public void testDistributedScoring() { - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().l1(0.1).l2(0.1) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().l1(0.1).l2(0.1) .seed(123).updater(new Nesterovs(0.1, 0.9)).graphBuilder() .addInputs("in") .addLayer("0", new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(nIn).nOut(3) @@ -217,7 +217,7 @@ public class TestSparkComputationGraph extends BaseSparkTest { //@Ignore("AB 2019/05/23 - Failing on CI only - passing locally. Possible precision or threading issue") public void testSeedRepeatability() throws Exception { - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).updater(Updater.RMSPROP) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345).updater(Updater.RMSPROP.getIUpdaterWithDefaultConfig()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in") .addLayer("0", new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(4).nOut(4) @@ -414,7 +414,7 @@ public class TestSparkComputationGraph extends BaseSparkTest { JavaRDD rdd = sc.parallelize(l); // simple model - val modelConf = new NeuralNetConfiguration.Builder() + val modelConf = NeuralNetConfiguration.builder() .updater(new Adam(0.01)) .weightInit(WeightInit.XAVIER_UNIFORM) .biasInit(0) diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/misc/TestFrozenLayers.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/misc/TestFrozenLayers.java index c899fae04..2e01cc17d 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/misc/TestFrozenLayers.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/misc/TestFrozenLayers.java @@ -23,6 +23,8 @@ package org.deeplearning4j.spark.impl.misc; import org.apache.spark.api.java.JavaRDD; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.DenseLayer; +import org.deeplearning4j.nn.conf.layers.DenseLayer.Builder; +import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.layers.FrozenLayer; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; @@ -45,6 +47,7 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; +import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction; import static org.junit.jupiter.api.Assertions.*; @@ -53,7 +56,7 @@ public class TestFrozenLayers extends BaseSparkTest { @Test public void testSparkFrozenLayers() { - NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1)) + NeuralNetConfiguration.NeuralNetConfigurationBuilder overallConf = NeuralNetConfiguration.builder().updater(new Sgd(0.1)) .activation(Activation.TANH); FineTuneConfiguration finetune = new FineTuneConfiguration.Builder().updater(new Sgd(0.1)).build(); @@ -61,12 +64,12 @@ public class TestFrozenLayers extends BaseSparkTest { int nIn = 6; int nOut = 3; - MultiLayerNetwork origModel = new MultiLayerNetwork(overallConf.clone().list() - .layer(0, new DenseLayer.Builder().nIn(6).nOut(5).build()) - .layer(1, new DenseLayer.Builder().nIn(5).nOut(4).build()) - .layer(2, new DenseLayer.Builder().nIn(4).nOut(3).build()) - .layer(3, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( - LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(3).nOut(3) + MultiLayerNetwork origModel = new MultiLayerNetwork((NeuralNetConfiguration) overallConf.clone() + .layer(0, new Builder().nIn(6).nOut(5).build()) + .layer(1, new Builder().nIn(5).nOut(4).build()) + .layer(2, new Builder().nIn(4).nOut(3).build()) + .layer(3, new OutputLayer.Builder( + LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(3).nOut(3) .build()) .build()); origModel.init(); @@ -74,7 +77,7 @@ public class TestFrozenLayers extends BaseSparkTest { MultiLayerNetwork withFrozen = new TransferLearning.Builder(origModel).fineTuneConfiguration(finetune) .setFeatureExtractor(1).build(); - Map m = withFrozen.paramTable(); + Map m = withFrozen.getParamTable(); Map pCopy = new HashMap<>(); for (Map.Entry entry : m.entrySet()) { pCopy.put(entry.getKey(), entry.getValue().dup()); @@ -110,7 +113,7 @@ public class TestFrozenLayers extends BaseSparkTest { MultiLayerNetwork fitted = sNet.getNetwork(); - Map fittedParams = fitted.paramTable(); + Map fittedParams = fitted.getParamTable(); for (Map.Entry entry : fittedParams.entrySet()) { INDArray orig = pCopy.get(entry.getKey()); @@ -136,7 +139,7 @@ public class TestFrozenLayers extends BaseSparkTest { int nIn = 6; int nOut = 3; - ComputationGraph origModel = new ComputationGraph(new NeuralNetConfiguration.Builder().updater(new Sgd(0.1)) + ComputationGraph origModel = new ComputationGraph(NeuralNetConfiguration.builder().updater(new Sgd(0.1)) .activation(Activation.TANH).graphBuilder().addInputs("in") .addLayer("0", new DenseLayer.Builder().nIn(6).nOut(5).build(), "in") .addLayer("1", new DenseLayer.Builder().nIn(5).nOut(4).build(), "0") diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/multilayer/TestMiscFunctions.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/multilayer/TestMiscFunctions.java index 8b5a8b46c..6b22acca7 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/multilayer/TestMiscFunctions.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/multilayer/TestMiscFunctions.java @@ -23,7 +23,6 @@ package org.deeplearning4j.spark.impl.multilayer; import org.apache.spark.api.java.JavaPairRDD; import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.*; import org.deeplearning4j.nn.conf.layers.variational.GaussianReconstructionDistribution; @@ -57,7 +56,7 @@ public class TestMiscFunctions extends BaseSparkTest { @Test public void testFeedForwardWithKey() { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER).list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER).list() .layer(0, new DenseLayer.Builder().nIn(4).nOut(3).build()) .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(3).nOut(3) .activation(Activation.SOFTMAX).build()) @@ -107,7 +106,7 @@ public class TestMiscFunctions extends BaseSparkTest { @Test public void testFeedForwardWithKeyInputMask() { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER) .list() .layer( new LSTM.Builder().nIn(4).nOut(3).build()) .layer(new GlobalPoolingLayer(PoolingType.AVG)) @@ -162,7 +161,7 @@ public class TestMiscFunctions extends BaseSparkTest { @Test public void testFeedForwardWithKeyGraph() { - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER) .graphBuilder().addInputs("in1", "in2") .addLayer("0", new DenseLayer.Builder().nIn(4).nOut(3).build(), "in1") .addLayer("1", new DenseLayer.Builder().nIn(4).nOut(3).build(), "in2").addLayer("2", @@ -220,7 +219,7 @@ public class TestMiscFunctions extends BaseSparkTest { int nIn = 10; - MultiLayerConfiguration mlc = new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration mlc = NeuralNetConfiguration.builder().list() .layer(0, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder() .reconstructionDistribution( new GaussianReconstructionDistribution(Activation.IDENTITY)) @@ -259,7 +258,7 @@ public class TestMiscFunctions extends BaseSparkTest { int nIn = 10; - MultiLayerConfiguration mlc = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration mlc = NeuralNetConfiguration.builder() .list().layer(0, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder() .reconstructionDistribution(new LossFunctionWrapper( diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/multilayer/TestSparkDl4jMultiLayer.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/multilayer/TestSparkDl4jMultiLayer.java index d2c0d66bc..7de0dc285 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/multilayer/TestSparkDl4jMultiLayer.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/multilayer/TestSparkDl4jMultiLayer.java @@ -25,7 +25,6 @@ import lombok.extern.slf4j.Slf4j; import org.apache.spark.api.java.JavaRDD; import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; import org.deeplearning4j.nn.api.OptimizationAlgorithm; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; @@ -41,7 +40,6 @@ import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.learning.config.Adam; -import org.nd4j.linalg.learning.config.Nesterovs; import org.nd4j.linalg.lossfunctions.LossFunctions; import java.util.ArrayList; @@ -96,7 +94,7 @@ public class TestSparkDl4jMultiLayer extends BaseSparkTest { //---------------------------------- //Create network configuration and conduct network training - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.FLOAT) .seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestCompareParameterAveragingSparkVsSingleMachine.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestCompareParameterAveragingSparkVsSingleMachine.java index 050e6279c..277c4a133 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestCompareParameterAveragingSparkVsSingleMachine.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestCompareParameterAveragingSparkVsSingleMachine.java @@ -26,7 +26,6 @@ import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; @@ -51,7 +50,6 @@ import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; import java.util.ArrayList; -import java.util.Arrays; import java.util.List; import static org.junit.jupiter.api.Assertions.*; @@ -63,9 +61,9 @@ public class TestCompareParameterAveragingSparkVsSingleMachine { } - private static MultiLayerConfiguration getConf(int seed, IUpdater updater) { + private static NeuralNetConfiguration getConf(int seed, IUpdater updater) { Nd4j.getRandom().setSeed(seed); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(WeightInit.XAVIER).updater(updater).seed(seed).list() .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()).layer(1, new OutputLayer.Builder() @@ -74,9 +72,9 @@ public class TestCompareParameterAveragingSparkVsSingleMachine { return conf; } - private static MultiLayerConfiguration getConfCNN(int seed, IUpdater updater) { + private static NeuralNetConfiguration getConfCNN(int seed, IUpdater updater) { Nd4j.getRandom().setSeed(seed); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(WeightInit.XAVIER).updater(updater).seed(seed).list() .layer(0, new ConvolutionLayer.Builder().nOut(3).kernelSize(2, 2).stride(1, 1).padding(0, 0) @@ -85,13 +83,13 @@ public class TestCompareParameterAveragingSparkVsSingleMachine { .activation(Activation.TANH).build()) .layer(1, new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nOut(10) .build()) - .setInputType(InputType.convolutional(10, 10, 3)).build(); + .inputType(InputType.convolutional(10, 10, 3)).build(); return conf; } private static ComputationGraphConfiguration getGraphConf(int seed, IUpdater updater) { Nd4j.getRandom().setSeed(seed); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(WeightInit.XAVIER).updater(updater).seed(seed).graphBuilder() .addInputs("in") @@ -105,7 +103,7 @@ public class TestCompareParameterAveragingSparkVsSingleMachine { private static ComputationGraphConfiguration getGraphConfCNN(int seed, IUpdater updater) { Nd4j.getRandom().setSeed(seed); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(WeightInit.XAVIER).updater(updater).seed(seed).graphBuilder() .addInputs("in") diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestSparkMultiLayerParameterAveraging.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestSparkMultiLayerParameterAveraging.java index c2c24a617..8376638f3 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestSparkMultiLayerParameterAveraging.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestSparkMultiLayerParameterAveraging.java @@ -37,7 +37,6 @@ import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.BaseLayer; import org.deeplearning4j.nn.conf.layers.BatchNormalization; @@ -121,7 +120,7 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { .toJavaRDD().map(new TestFn()); DataSet d = new IrisDataSetIterator(150, 150).next(); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(123) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(123) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() .layer(0, new DenseLayer.Builder().nIn(4).nOut(100).weightInit(WeightInit.XAVIER) .activation(Activation.RELU).build()) @@ -156,8 +155,8 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { .getAbsolutePath()) .toJavaRDD().map(new TestFn()); - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().seed(123) + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder().seed(123) .updater(new Adam(1e-6)) .weightInit(WeightInit.XAVIER) .list() @@ -211,14 +210,14 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { MultiLayerNetwork netCopy = sparkNet.getNetwork().clone(); netCopy.fit(data); - IUpdater expectedUpdater = ((BaseLayer) netCopy.conf().getLayer()).getIUpdater(); - double expectedLR = ((Nesterovs)((BaseLayer) netCopy.conf().getLayer()).getIUpdater()).getLearningRate(); - double expectedMomentum = ((Nesterovs)((BaseLayer) netCopy.conf().getLayer()).getIUpdater()).getMomentum(); + IUpdater expectedUpdater = ((BaseLayer) netCopy.getLayerConfiguration()).getIUpdater(); + double expectedLR = ((Nesterovs)((BaseLayer) netCopy.getLayerConfiguration()).getIUpdater()).getLearningRate(); + double expectedMomentum = ((Nesterovs)((BaseLayer) netCopy.getLayerConfiguration()).getIUpdater()).getMomentum(); - IUpdater actualUpdater = ((BaseLayer) sparkNet.getNetwork().conf().getLayer()).getIUpdater(); + IUpdater actualUpdater = ((BaseLayer) sparkNet.getNetwork().getLayerConfiguration()).getIUpdater(); sparkNet.fit(sparkData); - double actualLR = ((Nesterovs)((BaseLayer) sparkNet.getNetwork().conf().getLayer()).getIUpdater()).getLearningRate(); - double actualMomentum = ((Nesterovs)((BaseLayer) sparkNet.getNetwork().conf().getLayer()).getIUpdater()).getMomentum(); + double actualLR = ((Nesterovs)((BaseLayer) sparkNet.getNetwork().getLayerConfiguration()).getIUpdater()).getLearningRate(); + double actualMomentum = ((Nesterovs)((BaseLayer) sparkNet.getNetwork().getLayerConfiguration()).getIUpdater()).getMomentum(); assertEquals(expectedUpdater, actualUpdater); assertEquals(expectedLR, actualLR, 0.01); @@ -269,7 +268,7 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { //Spark tests don't run on windows return; } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new RmsProp()) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new RmsProp()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(nIn).nOut(3) .activation(Activation.TANH).build()) @@ -294,7 +293,7 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { @Test public void testDistributedScoring() { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().l1(0.1).l2(0.1) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().l1(0.1).l2(0.1) .seed(123).updater(new Nesterovs(0.1, 0.9)).list() .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(nIn).nOut(3) .activation(Activation.TANH).build()) @@ -383,7 +382,7 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { list.add(iter.next()); } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new RmsProp()) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new RmsProp()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(28 * 28).nOut(50) .activation(Activation.TANH).build()) @@ -447,7 +446,7 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new RmsProp()) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new RmsProp()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(28 * 28).nOut(50) .activation(Activation.TANH).build()) @@ -517,7 +516,7 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new RmsProp()) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new RmsProp()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(28 * 28).nOut(50) .activation(Activation.TANH).build()) @@ -605,7 +604,7 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().updater(new RmsProp()) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().updater(new RmsProp()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .graphBuilder().addInputs("in") .addLayer("0", new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(28 * 28).nOut(50) @@ -678,7 +677,7 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { //Spark tests don't run on windows return; } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).updater(new RmsProp()) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345).updater(new RmsProp()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(WeightInit.XAVIER).list() .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(4).nOut(4) @@ -763,7 +762,7 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { list.add(iter.next()); } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new RmsProp()) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new RmsProp()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(28 * 28).nOut(50) .activation(Activation.TANH).build()) @@ -785,13 +784,13 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { JavaRDD rdd = sc.parallelize(list); - assertEquals(0, sparkNet.getNetwork().getLayerWiseConfigurations().getIterationCount()); + assertEquals(0, sparkNet.getNetwork().getConfiguration().getIterationCount()); sparkNet.fit(rdd); assertEquals(minibatchesPerWorkerPerEpoch, - sparkNet.getNetwork().getLayerWiseConfigurations().getIterationCount()); + sparkNet.getNetwork().getConfiguration().getIterationCount()); sparkNet.fit(rdd); assertEquals(2 * minibatchesPerWorkerPerEpoch, - sparkNet.getNetwork().getLayerWiseConfigurations().getIterationCount()); + sparkNet.getNetwork().getConfiguration().getIterationCount()); sparkNet.getTrainingMaster().deleteTempFiles(sc); } @@ -813,7 +812,7 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { list.add(iter.next()); } - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().updater(new RmsProp()) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().updater(new RmsProp()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .graphBuilder().addInputs("in") .addLayer("0", new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(28 * 28).nOut(50) @@ -854,7 +853,7 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { int nIn = 8; Nd4j.getRandom().setSeed(12345); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).updater(new RmsProp()) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345).updater(new RmsProp()) .weightInit(WeightInit.XAVIER).list() .layer(0, new VariationalAutoencoder.Builder().nIn(8).nOut(10).encoderLayerSizes(12) .decoderLayerSizes(13).reconstructionDistribution( @@ -890,7 +889,7 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { int nIn = 8; Nd4j.getRandom().setSeed(12345); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).updater(new RmsProp()) + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345).updater(new RmsProp()) .weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in") .addLayer("0", new VariationalAutoencoder.Builder().nIn(8).nOut(10).encoderLayerSizes(12) .decoderLayerSizes(13).reconstructionDistribution( @@ -930,8 +929,8 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { int nOut = 2; int layerSize = 10; - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER).list() + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER).list() .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(layerSize).build()) .layer(1, new OutputLayer.Builder().nIn(layerSize).nOut(nOut) .activation(Activation.SOFTMAX).lossFunction( @@ -985,8 +984,8 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { int nOut = 3; int layerSize = 10; - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER).list() + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER).list() .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(layerSize).build()) .layer(1, new OutputLayer.Builder().nIn(layerSize).nOut(nOut) .activation(Activation.SOFTMAX).lossFunction( @@ -1039,12 +1038,12 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { //Spark tests don't run on windows return; } - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() .layer(new OutputLayer.Builder().nIn(4).nOut(3).build()) .build(); - ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder() + ComputationGraphConfiguration conf2 = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") .addLayer("out", new OutputLayer.Builder().nIn(4).nOut(3).build(), "in") @@ -1075,11 +1074,11 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { for(int i=0; i<3; i++ ){ - assertEquals(i, sn1.getNetwork().getLayerWiseConfigurations().getEpochCount()); + assertEquals(i, sn1.getNetwork().getConfiguration().getEpochCount()); assertEquals(i, sn2.getNetwork().getComputationGraphConfiguration().getEpochCount()); sn1.fit(rdd); sn2.fit(rdd); - assertEquals(i+1, sn1.getNetwork().getLayerWiseConfigurations().getEpochCount()); + assertEquals(i+1, sn1.getNetwork().getConfiguration().getEpochCount()); assertEquals(i+1, sn2.getNetwork().getComputationGraphConfiguration().getEpochCount()); } } diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/stats/TestTrainingStatsCollection.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/stats/TestTrainingStatsCollection.java index 5d33e82c6..5b735e5a2 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/stats/TestTrainingStatsCollection.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/stats/TestTrainingStatsCollection.java @@ -22,11 +22,9 @@ package org.deeplearning4j.spark.impl.stats; import com.sun.jna.Platform; import org.apache.commons.io.FilenameUtils; -import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.deeplearning4j.nn.api.OptimizationAlgorithm; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; @@ -68,7 +66,7 @@ public class TestTrainingStatsCollection extends BaseSparkTest { try { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()) .layer(1, new OutputLayer.Builder().nIn(10).nOut(10).build()) diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/ui/TestListeners.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/ui/TestListeners.java index aadf69cdd..1104f8667 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/ui/TestListeners.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/ui/TestListeners.java @@ -27,7 +27,6 @@ import org.deeplearning4j.core.storage.Persistable; import org.deeplearning4j.core.storage.StatsStorage; import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator; import org.deeplearning4j.nn.api.OptimizationAlgorithm; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; @@ -60,7 +59,7 @@ public class TestListeners extends BaseSparkTest { JavaSparkContext sc = getContext(); int nExecutors = numExecutors(); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(123) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(123) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() .layer(0, new DenseLayer.Builder().nIn(4).nOut(100).weightInit(WeightInit.XAVIER) .activation(Activation.RELU).build()) diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/ParameterServerTrainingHook.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/ParameterServerTrainingHook.java index 402560c73..060b88dc1 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/ParameterServerTrainingHook.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/ParameterServerTrainingHook.java @@ -20,7 +20,7 @@ package org.deeplearning4j.spark.parameterserver; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.spark.api.TrainingHook; import org.nd4j.linalg.dataset.api.DataSet; import org.nd4j.linalg.dataset.api.MultiDataSet; @@ -39,7 +39,7 @@ public class ParameterServerTrainingHook implements TrainingHook { * @param model themodel that was update */ @Override - public void preUpdate(DataSet minibatch, Model model) { + public void preUpdate(DataSet minibatch, IModel model) { //pull } @@ -51,7 +51,7 @@ public class ParameterServerTrainingHook implements TrainingHook { * @param model the model that was updated */ @Override - public void postUpdate(DataSet minibatch, Model model) { + public void postUpdate(DataSet minibatch, IModel model) { //push } @@ -63,7 +63,7 @@ public class ParameterServerTrainingHook implements TrainingHook { * @param model themodel that was update */ @Override - public void preUpdate(MultiDataSet minibatch, Model model) { + public void preUpdate(MultiDataSet minibatch, IModel model) { //pull } @@ -75,7 +75,7 @@ public class ParameterServerTrainingHook implements TrainingHook { * @param model the model that was updated */ @Override - public void postUpdate(MultiDataSet minibatch, Model model) { + public void postUpdate(MultiDataSet minibatch, IModel model) { //push } } diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/pw/SharedTrainingWrapper.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/pw/SharedTrainingWrapper.java index 7e521f0c1..0265837bd 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/pw/SharedTrainingWrapper.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/pw/SharedTrainingWrapper.java @@ -27,7 +27,7 @@ import org.deeplearning4j.core.storage.StatsStorageRouter; import org.deeplearning4j.core.storage.listener.RoutingIterationListener; import org.deeplearning4j.common.config.DL4JEnvironmentVars; import org.deeplearning4j.exception.DL4JInvalidConfigException; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.api.Updater; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; @@ -89,7 +89,7 @@ public class SharedTrainingWrapper { protected ThreadLocal iteratorDataSetCount = new ThreadLocal<>(); //Using AtomicInteger because it's mutable, not because it's atomic protected ThreadLocal observer = new ThreadLocal<>(); protected EncodedGradientsAccumulator accumulator; - protected Model originalModel; + protected IModel originalModel; protected UpdatesConsumer consumer; @@ -200,7 +200,7 @@ public class SharedTrainingWrapper { SharedTrainingConfiguration trainingConfiguration = worker.getBroadcastConfiguration().getValue(); VoidConfiguration voidConfiguration = worker.getBroadcastConfiguration().getValue().getVoidConfiguration(); - Model model = null; + IModel model = null; /* Plan is simple here: if there's defined field in SharedTrainingConfiguration - use that. @@ -425,7 +425,7 @@ public class SharedTrainingWrapper { .setTrainingWorkspaceMode(trainingConfiguration.getWorkspaceMode()); ((ComputationGraph) originalModel).setGradientsAccumulator(accumulator); } else if (model instanceof MultiLayerNetwork) { - ((MultiLayerNetwork) originalModel).getLayerWiseConfigurations() + ((MultiLayerNetwork) originalModel).getConfiguration() .setTrainingWorkspaceMode(trainingConfiguration.getWorkspaceMode()); ((MultiLayerNetwork) originalModel).setGradientsAccumulator(accumulator); } diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/training/SharedTrainingMaster.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/training/SharedTrainingMaster.java index 1a11d70a5..ef252470b 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/training/SharedTrainingMaster.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/training/SharedTrainingMaster.java @@ -262,7 +262,7 @@ public class SharedTrainingMaster extends BaseTrainingMaster iterations = Collections.newSetFromMap(new ConcurrentHashMap<>()); private static final Set epochs = Collections.newSetFromMap(new ConcurrentHashMap<>()); @Override - public void iterationDone(Model model, int iteration, int epoch) { + public void iterationDone(IModel model, int iteration, int epoch) { iterations.add(iteration); epochs.add(epoch); } diff --git a/cavis-dnn/cavis-dnn-tsne/src/main/java/org/deeplearning4j/plot/BarnesHutTsne.java b/cavis-dnn/cavis-dnn-tsne/src/main/java/org/deeplearning4j/plot/BarnesHutTsne.java index d8efd7dbb..920f58979 100644 --- a/cavis-dnn/cavis-dnn-tsne/src/main/java/org/deeplearning4j/plot/BarnesHutTsne.java +++ b/cavis-dnn/cavis-dnn-tsne/src/main/java/org/deeplearning4j/plot/BarnesHutTsne.java @@ -22,11 +22,11 @@ import lombok.AllArgsConstructor; import lombok.Data; import lombok.Setter; import lombok.extern.slf4j.Slf4j; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.clustering.algorithm.Distance; import org.deeplearning4j.clustering.sptree.DataPoint; import org.deeplearning4j.clustering.sptree.SpTree; import org.deeplearning4j.clustering.vptree.VPTree; -import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.WorkspaceMode; import org.deeplearning4j.nn.gradient.DefaultGradient; @@ -64,7 +64,7 @@ import static org.nd4j.linalg.ops.transforms.Transforms.sign; */ @Slf4j @Data -public class BarnesHutTsne implements Model { +public class BarnesHutTsne implements IModel { public final static String workspaceCache = "LOOP_CACHE"; @@ -897,12 +897,12 @@ public class BarnesHutTsne implements Model { } @Override - public NeuralNetConfiguration conf() { + public NeuralNetConfiguration getNetConfiguration() { return null; } @Override - public void setConf(NeuralNetConfiguration conf) { + public void setLayerConfiguration(NeuralNetConfiguration layerConfiguration) { } diff --git a/cavis-ui/cavis-ui-common/src/main/java/org/deeplearning4j/ui/weights/ConvolutionalIterationListener.java b/cavis-ui/cavis-ui-common/src/main/java/org/deeplearning4j/ui/weights/ConvolutionalIterationListener.java index 4770b2d76..44caf37c4 100644 --- a/cavis-ui/cavis-ui-common/src/main/java/org/deeplearning4j/ui/weights/ConvolutionalIterationListener.java +++ b/cavis-ui/cavis-ui-common/src/main/java/org/deeplearning4j/ui/weights/ConvolutionalIterationListener.java @@ -23,12 +23,12 @@ package org.deeplearning4j.ui.weights; import lombok.NonNull; import lombok.extern.slf4j.Slf4j; import lombok.val; +import net.brutex.ai.dnn.api.IModel; import org.datavec.image.loader.ImageLoader; import org.deeplearning4j.core.storage.Persistable; import org.deeplearning4j.core.storage.StatsStorage; import org.deeplearning4j.core.storage.StatsStorageRouter; import org.deeplearning4j.nn.api.Layer; -import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.optimize.api.BaseTrainingListener; @@ -40,8 +40,6 @@ import org.deeplearning4j.ui.model.weights.ConvolutionListenerPersistable; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.exception.ND4JArraySizeException; import org.nd4j.common.io.ClassPathResource; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import javax.imageio.ImageIO; import java.awt.*; @@ -60,7 +58,7 @@ public class ConvolutionalIterationListener extends BaseTrainingListener { } private int freq = 10; - private static final Logger log = LoggerFactory.getLogger(ConvolutionalIterationListener.class); + private int minibatchNum = 0; private boolean openBrowser = true; private final String path; @@ -125,12 +123,12 @@ public class ConvolutionalIterationListener extends BaseTrainingListener { * @param iteration the iteration number */ @Override - public void iterationDone(Model model, int iteration, int epoch) { + public void iterationDone(IModel model, int iteration, int epoch) { } @Override - public void onForwardPass(Model model, Map activations) { + public void onForwardPass(IModel model, Map activations) { int iteration = (model instanceof MultiLayerNetwork ? ((MultiLayerNetwork)model).getIterationCount() : ((ComputationGraph)model).getIterationCount()); if (iteration % freq == 0) { @@ -147,7 +145,7 @@ public class ConvolutionalIterationListener extends BaseTrainingListener { throw new RuntimeException("layers.length != activations.size(). Got layers.length="+layers.length+", activations.size()="+activations.size()); for( int i=0; i activations) { + public void onForwardPass(IModel model, List activations) { int iteration = (model instanceof MultiLayerNetwork ? ((MultiLayerNetwork)model).getIterationCount() : ((ComputationGraph)model).getIterationCount()); if (iteration % freq == 0) { diff --git a/cavis-ui/cavis-ui-common/src/test/java/org/deeplearning4j/ui/ManualTests.java b/cavis-ui/cavis-ui-common/src/test/java/org/deeplearning4j/ui/ManualTests.java index 81cd4e5b1..7c5de3bbb 100644 --- a/cavis-ui/cavis-ui-common/src/test/java/org/deeplearning4j/ui/ManualTests.java +++ b/cavis-ui/cavis-ui-common/src/test/java/org/deeplearning4j/ui/ManualTests.java @@ -29,8 +29,8 @@ import org.deeplearning4j.models.embeddings.reader.impl.BasicModelUtils; import org.deeplearning4j.models.word2vec.VocabWord; import org.deeplearning4j.models.word2vec.Word2Vec; import org.deeplearning4j.nn.conf.GradientNormalization; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration.NeuralNetConfigurationBuilder; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; import org.deeplearning4j.nn.conf.layers.DenseLayer; @@ -125,10 +125,10 @@ public class ManualTests { outputNum, useSubset, true, 1.0, new Random(seed)); log.info("Build model...."); - MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(seed) + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().seed(seed) .activation(Activation.RELU).weightInit(WeightInit.XAVIER) .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer) - .updater(new AdaGrad(0.01)).weightNoise(new DropConnect(0.5)).list() + .updater(new AdaGrad(0.01)).weightNoise(new DropConnect(0.5)) .layer(0, new ConvolutionLayer.Builder(4, 4).name("cnn1").nIn(nChannels).stride(1, 1).nOut(20) .build()) .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] {2, 2}) @@ -144,7 +144,7 @@ public class ManualTests { .layer(8, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nOut(outputNum).activation(Activation.SOFTMAX).build()) - .setInputType(InputType.convolutional(numRows, numColumns, nChannels)); + .inputType(InputType.convolutional(numRows, numColumns, nChannels)); MultiLayerNetwork model = new MultiLayerNetwork(builder.build()); model.init(); @@ -246,10 +246,10 @@ public class ManualTests { DataSetIterator mnistTest = new MnistDataSetIterator(batchSize, false, 12345); log.info("Build model...."); - MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(seed) + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().seed(seed) .l2(0.0005) .weightInit(WeightInit.XAVIER) - .updater(new Nesterovs(0.01, 0.9)).list() + .updater(new Nesterovs(0.01, 0.9)) .layer(0, new ConvolutionLayer.Builder(5, 5) //nIn and nOut specify depth. nIn here is the nChannels and nOut is the number of filters to be applied .nIn(nChannels).stride(1, 1).nOut(20).activation(Activation.IDENTITY).build()) @@ -263,9 +263,9 @@ public class ManualTests { .layer(4, new DenseLayer.Builder().activation(Activation.RELU).nOut(500).build()) .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nOut(outputNum).activation(Activation.SOFTMAX).build()) - .setInputType(InputType.convolutional(28, 28, nChannels)); + .inputType(InputType.convolutional(28, 28, nChannels)); - MultiLayerConfiguration conf = builder.build(); + NeuralNetConfiguration conf = builder.build(); MultiLayerNetwork model = new MultiLayerNetwork(conf); model.init(); /* @@ -320,10 +320,10 @@ public class ManualTests { DataSetIterator mnistTrain = new MnistDataSetIterator(batchSize, true, 12345); log.info("Build model...."); - MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(seed) + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().seed(seed) .l2(0.0005) .weightInit(WeightInit.XAVIER) - .updater(new Nesterovs(0.01, 0.9)).list() + .updater(new Nesterovs(0.01, 0.9)) .layer(0, new FrozenLayer(new ConvolutionLayer.Builder(5, 5) //nIn and nOut specify depth. nIn here is the nChannels and nOut is the number of filters to be applied .nIn(nChannels).stride(1, 1).nOut(20).activation(Activation.IDENTITY).build())) @@ -332,9 +332,9 @@ public class ManualTests { .layer(2, new FrozenLayer(new DenseLayer.Builder().activation(Activation.RELU).nOut(500).build())) .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nOut(outputNum).activation(Activation.SOFTMAX).build()) - .setInputType(InputType.convolutionalFlat(28, 28, nChannels)); + .inputType(InputType.convolutionalFlat(28, 28, nChannels)); - MultiLayerConfiguration conf = builder.build(); + NeuralNetConfiguration conf = builder.build(); MultiLayerNetwork model = new MultiLayerNetwork(conf); model.init(); diff --git a/cavis-ui/cavis-ui-common/src/test/java/org/deeplearning4j/ui/weights/TestConvolutionalListener.java b/cavis-ui/cavis-ui-common/src/test/java/org/deeplearning4j/ui/weights/TestConvolutionalListener.java index 442f3bf01..e545ff53b 100644 --- a/cavis-ui/cavis-ui-common/src/test/java/org/deeplearning4j/ui/weights/TestConvolutionalListener.java +++ b/cavis-ui/cavis-ui-common/src/test/java/org/deeplearning4j/ui/weights/TestConvolutionalListener.java @@ -21,7 +21,6 @@ package org.deeplearning4j.ui.weights; import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; @@ -54,9 +53,9 @@ public class TestConvolutionalListener { DataSetIterator mnistTrain = new MnistDataSetIterator(batchSize, true, 12345); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) // Training iterations as above + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345) // Training iterations as above .l2(0.0005).weightInit(WeightInit.XAVIER) - .updater(new Nesterovs(0.01, 0.9)).list() + .updater(new Nesterovs(0.01, 0.9)) .layer(0, new ConvolutionLayer.Builder(5, 5) //nIn and nOut specify depth. nIn here is the nChannels and nOut is the number of filters to be applied .nIn(nChannels).stride(1, 1).nOut(20).activation(Activation.IDENTITY).build()) @@ -70,7 +69,7 @@ public class TestConvolutionalListener { .layer(4, new DenseLayer.Builder().activation(Activation.RELU).nOut(500).build()) .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nOut(outputNum).activation(Activation.SOFTMAX).build()) - .setInputType(InputType.convolutionalFlat(28, 28, 1)) //See note below + .inputType(InputType.convolutionalFlat(28, 28, 1)) //See note below .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); diff --git a/cavis-ui/cavis-ui-model/src/main/java/org/deeplearning4j/ui/model/stats/BaseStatsListener.java b/cavis-ui/cavis-ui-model/src/main/java/org/deeplearning4j/ui/model/stats/BaseStatsListener.java index e660a8e04..b9a7e985d 100644 --- a/cavis-ui/cavis-ui-model/src/main/java/org/deeplearning4j/ui/model/stats/BaseStatsListener.java +++ b/cavis-ui/cavis-ui-model/src/main/java/org/deeplearning4j/ui/model/stats/BaseStatsListener.java @@ -21,6 +21,7 @@ package org.deeplearning4j.ui.model.stats; import lombok.extern.slf4j.Slf4j; +import net.brutex.ai.dnn.api.IModel; import org.apache.commons.io.IOUtils; import org.bytedeco.javacpp.Pointer; import org.deeplearning4j.common.config.DL4JClassLoading; @@ -28,7 +29,6 @@ import org.deeplearning4j.core.storage.StatsStorageRouter; import org.deeplearning4j.core.storage.StorageMetaData; import org.deeplearning4j.core.storage.listener.RoutingIterationListener; import org.deeplearning4j.nn.api.Layer; -import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.graph.ComputationGraph; @@ -85,7 +85,7 @@ public abstract class BaseStatsListener implements RoutingIterationListener { private Map meanMagGradients; private static class ModelInfo implements Serializable { - private final Model model; + private final IModel model; private long initTime; private long lastReportTime = -1; private int lastReportIteration = -1; @@ -97,12 +97,12 @@ public abstract class BaseStatsListener implements RoutingIterationListener { private int iterCount = 0; - private ModelInfo(Model model) { + private ModelInfo(IModel model) { this.model = model; } } - private ModelInfo getModelInfo(Model model) { + private ModelInfo getModelInfo(IModel model) { ModelInfo mi = null; for (ModelInfo m : modelInfos) { if (m.model == model) { @@ -218,7 +218,7 @@ public abstract class BaseStatsListener implements RoutingIterationListener { return sessionID; } - private String getSessionID(Model model) { + private String getSessionID(IModel model) { if (model instanceof MultiLayerNetwork || model instanceof ComputationGraph) return sessionID; if (model instanceof Layer) { @@ -231,17 +231,17 @@ public abstract class BaseStatsListener implements RoutingIterationListener { } @Override - public void onEpochStart(Model model) { + public void onEpochStart(IModel model) { } @Override - public void onEpochEnd(Model model) { + public void onEpochEnd(IModel model) { } @Override - public void onForwardPass(Model model, List activations) { + public void onForwardPass(IModel model, List activations) { int iterCount = getModelInfo(model).iterCount; if (calcFromActivations() && (iterCount == 0 || iterCount % updateConfig.reportingFrequency() == 0)) { //Assumption: we have input, layer 0, layer 1, ... @@ -257,7 +257,7 @@ public abstract class BaseStatsListener implements RoutingIterationListener { } @Override - public void onForwardPass(Model model, Map activations) { + public void onForwardPass(IModel model, Map activations) { int iterCount = getModelInfo(model).iterCount; if (calcFromActivations() && updateConfig.reportingFrequency() > 0 && (iterCount == 0 || iterCount % updateConfig.reportingFrequency() == 0)) { @@ -277,7 +277,7 @@ public abstract class BaseStatsListener implements RoutingIterationListener { } @Override - public void onGradientCalculation(Model model) { + public void onGradientCalculation(IModel model) { int iterCount = getModelInfo(model).iterCount; if (calcFromGradients() && updateConfig.reportingFrequency() > 0 && (iterCount == 0 || iterCount % updateConfig.reportingFrequency() == 0)) { @@ -311,12 +311,12 @@ public abstract class BaseStatsListener implements RoutingIterationListener { } @Override - public void onBackwardPass(Model model) { + public void onBackwardPass(IModel model) { //No op } @Override - public void iterationDone(Model model, int iteration, int epoch) { + public void iterationDone(IModel model, int iteration, int epoch) { ModelInfo modelInfo = getModelInfo(model); boolean backpropParamsOnly = backpropParamsOnly(model); @@ -426,10 +426,10 @@ public abstract class BaseStatsListener implements RoutingIterationListener { //Need to append "0_", "1_" etc to param names from layers... int layerIdx = 0; for (Layer l : ((MultiLayerNetwork) model).getLayers()) { - NeuralNetConfiguration conf = l.conf(); - List paramkeys = l.conf().getLayer().initializer().paramKeys(l.conf().getLayer()); + NeuralNetConfiguration conf = l.getNetConfiguration(); + List paramkeys = l.getLayerConfiguration().initializer().paramKeys(l.getLayerConfiguration()); for (String s : paramkeys) { - double lr = conf.getLayer().getUpdaterByParam(s).getLearningRate(l.getIterationCount(), l.getEpochCount()); + double lr = conf.getFirstLayer().getUpdaterByParam(s).getLearningRate(l.getIterationCount(), l.getEpochCount()); if (Double.isNaN(lr)) { //Edge case: No-Op updater, AdaDelta etc - don't have a LR hence return NaN for IUpdater.getLearningRate lr = 0.0; @@ -440,11 +440,11 @@ public abstract class BaseStatsListener implements RoutingIterationListener { } } else if (model instanceof ComputationGraph) { for (Layer l : ((ComputationGraph) model).getLayers()) { - NeuralNetConfiguration conf = l.conf(); - String layerName = conf.getLayer().getLayerName(); - List paramkeys = l.conf().getLayer().initializer().paramKeys(l.conf().getLayer()); + NeuralNetConfiguration conf = l.getNetConfiguration(); + String layerName = conf.getFirstLayer().getLayerName(); + List paramkeys = l.getLayerConfiguration().initializer().paramKeys(l.getLayerConfiguration()); for (String s : paramkeys) { - double lr = conf.getLayer().getUpdaterByParam(s).getLearningRate(l.getIterationCount(), l.getEpochCount()); + double lr = conf.getFirstLayer().getUpdaterByParam(s).getLearningRate(l.getIterationCount(), l.getEpochCount()); if (Double.isNaN(lr)) { //Edge case: No-Op updater, AdaDelta etc - don't have a LR hence return NaN for IUpdater.getLearningRate lr = 0.0; @@ -454,9 +454,9 @@ public abstract class BaseStatsListener implements RoutingIterationListener { } } else if (model instanceof Layer) { Layer l = (Layer) model; - List paramkeys = l.conf().getLayer().initializer().paramKeys(l.conf().getLayer()); + List paramkeys = l.getLayerConfiguration().initializer().paramKeys(l.getLayerConfiguration()); for (String s : paramkeys) { - double lr = l.conf().getLayer().getUpdaterByParam(s).getLearningRate(l.getIterationCount(), l.getEpochCount()); + double lr = l.getLayerConfiguration().getUpdaterByParam(s).getLearningRate(l.getIterationCount(), l.getEpochCount()); lrs.put(s, lr); } } @@ -575,7 +575,7 @@ public abstract class BaseStatsListener implements RoutingIterationListener { return System.currentTimeMillis(); } - private void doInit(Model model) { + private void doInit(IModel model) { boolean backpropParamsOnly = backpropParamsOnly(model); long initTime = System.currentTimeMillis(); //TODO support NTP StatsInitializationReport initReport = getNewInitializationReport(); @@ -652,7 +652,7 @@ public abstract class BaseStatsListener implements RoutingIterationListener { long numParams; if (model instanceof MultiLayerNetwork) { MultiLayerNetwork net = ((MultiLayerNetwork) model); - jsonConf = net.getLayerWiseConfigurations().toJson(); + jsonConf = net.getConfiguration().toJson(); numLayers = net.getnLayers(); numParams = net.numParams(); } else if (model instanceof ComputationGraph) { @@ -662,7 +662,7 @@ public abstract class BaseStatsListener implements RoutingIterationListener { numParams = cg.numParams(); } else if (model instanceof Layer) { Layer l = (Layer) model; - jsonConf = l.conf().toJson(); + jsonConf = l.getNetConfiguration().toJson(); numLayers = 1; numParams = l.numParams(); } else { @@ -707,7 +707,7 @@ public abstract class BaseStatsListener implements RoutingIterationListener { } } - private void updateExamplesMinibatchesCounts(Model model) { + private void updateExamplesMinibatchesCounts(IModel model) { ModelInfo modelInfo = getModelInfo(model); int examplesThisMinibatch = 0; if (model instanceof MultiLayerNetwork) { @@ -723,7 +723,7 @@ public abstract class BaseStatsListener implements RoutingIterationListener { modelInfo.totalMinibatches++; } - private boolean backpropParamsOnly(Model model) { + private boolean backpropParamsOnly(IModel model) { //For pretrain layers (VAE, AE) we *do* want pretrain params also; for MLN and CG we only want backprop params // as we only have backprop gradients return model instanceof MultiLayerNetwork || model instanceof ComputationGraph; diff --git a/cavis-ui/cavis-ui-model/src/test/java/org/deeplearning4j/ui/stats/TestStatsListener.java b/cavis-ui/cavis-ui-model/src/test/java/org/deeplearning4j/ui/stats/TestStatsListener.java index 56952d870..9b1a4801e 100644 --- a/cavis-ui/cavis-ui-model/src/test/java/org/deeplearning4j/ui/stats/TestStatsListener.java +++ b/cavis-ui/cavis-ui-model/src/test/java/org/deeplearning4j/ui/stats/TestStatsListener.java @@ -24,7 +24,6 @@ import org.deeplearning4j.core.storage.Persistable; import org.deeplearning4j.core.storage.StatsStorage; import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator; import org.deeplearning4j.nn.api.OptimizationAlgorithm; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; @@ -51,10 +50,10 @@ public class TestStatsListener extends BaseDL4JTest { DataSet ds = new IrisDataSetIterator(150, 150).next(); - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .list().layer(0, + .layer(0, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(4).nOut(3).build()) .build(); diff --git a/cavis-ui/cavis-ui-model/src/test/java/org/deeplearning4j/ui/stats/TestTransferStatsCollection.java b/cavis-ui/cavis-ui-model/src/test/java/org/deeplearning4j/ui/stats/TestTransferStatsCollection.java index 3cf4ec7d9..d5b1a116b 100644 --- a/cavis-ui/cavis-ui-model/src/test/java/org/deeplearning4j/ui/stats/TestTransferStatsCollection.java +++ b/cavis-ui/cavis-ui-model/src/test/java/org/deeplearning4j/ui/stats/TestTransferStatsCollection.java @@ -21,7 +21,6 @@ package org.deeplearning4j.ui.stats; import org.deeplearning4j.BaseDL4JTest; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; @@ -43,7 +42,7 @@ public class TestTransferStatsCollection extends BaseDL4JTest { @Test public void test() throws IOException { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()) .layer(1, new OutputLayer.Builder().activation(Activation.SOFTMAX).nIn(10).nOut(10).build()).build(); diff --git a/cavis-ui/cavis-ui-vertx/src/main/java/org/deeplearning4j/ui/module/train/TrainModule.java b/cavis-ui/cavis-ui-vertx/src/main/java/org/deeplearning4j/ui/module/train/TrainModule.java index 858648018..7e384dec5 100644 --- a/cavis-ui/cavis-ui-vertx/src/main/java/org/deeplearning4j/ui/module/train/TrainModule.java +++ b/cavis-ui/cavis-ui-vertx/src/main/java/org/deeplearning4j/ui/module/train/TrainModule.java @@ -38,7 +38,6 @@ import org.deeplearning4j.core.storage.StatsStorageEvent; import org.deeplearning4j.core.storage.StatsStorageListener; import org.deeplearning4j.common.config.DL4JSystemProperties; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.graph.GraphVertex; import org.deeplearning4j.nn.conf.graph.LayerVertex; @@ -872,7 +871,7 @@ public class TrainModule implements UIModule { .end(json); } - private TrainModuleUtils.GraphInfo getGraphInfo(Triple conf) { if (conf == null) { return null; @@ -881,7 +880,7 @@ public class TrainModule implements UIModule { if (conf.getFirst() != null) { return TrainModuleUtils.buildGraphInfo(conf.getFirst()); } else if (conf.getSecond() != null) { - return TrainModuleUtils.buildGraphInfo(conf.getSecond()); + return TrainModuleUtils.buildGraphInfo(conf.getSecond().getDefaultConfiguration()); } else if (conf.getThird() != null) { return TrainModuleUtils.buildGraphInfo(conf.getThird()); } else { @@ -889,7 +888,7 @@ public class TrainModule implements UIModule { } } - private Triple getConfig(String sessionId) { + private Triple getConfig(String sessionId) { boolean noData = (sessionId == null || !knownSessionIDs.containsKey(sessionId)); StatsStorage ss = (noData ? null : knownSessionIDs.get(sessionId)); List allStatic = (noData ? Collections.EMPTY_LIST @@ -902,7 +901,7 @@ public class TrainModule implements UIModule { String config = p.getModelConfigJson(); if (modelClass.endsWith("MultiLayerNetwork")) { - MultiLayerConfiguration conf = MultiLayerConfiguration.fromJson(config); + NeuralNetConfiguration conf = NeuralNetConfiguration.fromJson(config); return new Triple<>(conf, null, null); } else if (modelClass.endsWith("ComputationGraph")) { ComputationGraphConfiguration conf = ComputationGraphConfiguration.fromJson(config); @@ -940,7 +939,7 @@ public class TrainModule implements UIModule { Map result = new HashMap<>(); result.put("updateTimestamp", lastUpdateTime); - Triple conf = getConfig(sessionId); + Triple conf = getConfig(sessionId); if (conf == null) { rc.response() .putHeader("content-type", "application/json") @@ -1097,7 +1096,7 @@ public class TrainModule implements UIModule { .end(asJson(ret)); } - private static String getLayerType(Layer layer) { + private static String getLayerType(LayerConfiguration layer) { String layerType = "n/a"; if (layer != null) { try { @@ -1124,14 +1123,14 @@ public class TrainModule implements UIModule { //TODO error handling... String layerType = ""; - Layer layer = null; + LayerConfiguration layer = null; NeuralNetConfiguration nnc = null; if (modelClass.endsWith("MultiLayerNetwork")) { - MultiLayerConfiguration conf = MultiLayerConfiguration.fromJson(configJson); + NeuralNetConfiguration conf = NeuralNetConfiguration.fromJson(configJson); int confIdx = layerIdx - 1; //-1 because of input if (confIdx >= 0) { - nnc = conf.getConf(confIdx); - layer = nnc.getLayer(); + nnc = conf.getNetConfigurations().get(confIdx); + layer = nnc.getFirstLayer(); } else { //Input layer layerType = "Input"; @@ -1144,8 +1143,8 @@ public class TrainModule implements UIModule { Map vertices = conf.getVertices(); if (vertices.containsKey(vertexName) && vertices.get(vertexName) instanceof LayerVertex) { LayerVertex lv = (LayerVertex) vertices.get(vertexName); - nnc = lv.getLayerConf(); - layer = nnc.getLayer(); + nnc = lv.getNetConfiguration(); + layer = nnc.getFirstLayer(); } else if (conf.getNetworkInputs().contains(vertexName)) { layerType = "Input"; } else { @@ -1178,7 +1177,7 @@ public class TrainModule implements UIModule { if (layer instanceof BaseLayer) { BaseLayer bl = (BaseLayer) layer; activationFn = bl.getActivationFn().toString(); - long nParams = layer.initializer().numParams(nnc); + long nParams = layer.initializer().numParams(nnc.getFirstLayer()); layerInfoRows.add(new String[]{i18N.getMessage("train.model.layerinfotable.layerNParams"), String.valueOf(nParams)}); if (nParams > 0) { diff --git a/cavis-ui/cavis-ui-vertx/src/main/java/org/deeplearning4j/ui/module/train/TrainModuleUtils.java b/cavis-ui/cavis-ui-vertx/src/main/java/org/deeplearning4j/ui/module/train/TrainModuleUtils.java index 274e670f6..34b6563f1 100644 --- a/cavis-ui/cavis-ui-vertx/src/main/java/org/deeplearning4j/ui/module/train/TrainModuleUtils.java +++ b/cavis-ui/cavis-ui-vertx/src/main/java/org/deeplearning4j/ui/module/train/TrainModuleUtils.java @@ -24,7 +24,6 @@ import com.fasterxml.jackson.annotation.JsonIgnore; import lombok.AllArgsConstructor; import lombok.Data; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.graph.GraphVertex; import org.deeplearning4j.nn.conf.graph.LayerVertex; @@ -50,7 +49,7 @@ public class TrainModuleUtils { private List originalVertexName; } - public static GraphInfo buildGraphInfo(MultiLayerConfiguration config) { + public static GraphInfo buildGraphInfo(NeuralNetConfiguration config) { List vertexNames = new ArrayList<>(); List originalVertexName = new ArrayList<>(); List layerTypes = new ArrayList<>(); @@ -63,17 +62,17 @@ public class TrainModuleUtils { layerInfo.add(Collections.emptyMap()); - List list = config.getConfs(); + List list = config.getNetConfigurations(); int layerIdx = 1; for (NeuralNetConfiguration c : list) { - Layer layer = c.getLayer(); + LayerConfiguration layer = c.getFirstLayer(); String layerName = layer.getLayerName(); if (layerName == null) layerName = "layer" + layerIdx; vertexNames.add(layerName); originalVertexName.add(String.valueOf(layerIdx - 1)); - String layerType = c.getLayer().getClass().getSimpleName().replaceAll("Layer$", ""); + String layerType = c.getFirstLayer().getClass().getSimpleName().replaceAll("Layer$", ""); layerTypes.add(layerType); layerInputs.add(Collections.singletonList(layerIdx - 1)); @@ -87,6 +86,7 @@ public class TrainModuleUtils { return new GraphInfo(vertexNames, layerTypes, layerInputs, layerInfo, originalVertexName); } + /** public static GraphInfo buildGraphInfo(ComputationGraphConfiguration config) { List layerNames = new ArrayList<>(); List layerTypes = new ArrayList<>(); @@ -129,7 +129,7 @@ public class TrainModuleUtils { if (gv instanceof LayerVertex) { NeuralNetConfiguration c = ((LayerVertex) gv).getLayerConf(); - Layer layer = c.getLayer(); + LayerConfiguration layer = c.getFirstLayer(); String layerType = layer.getClass().getSimpleName().replaceAll("Layer$", ""); layerTypes.add(layerType); @@ -148,7 +148,9 @@ public class TrainModuleUtils { return new GraphInfo(layerNames, layerTypes, layerInputs, layerInfo, originalVertexName); } + **/ + /** public static GraphInfo buildGraphInfo(NeuralNetConfiguration config) { List vertexNames = new ArrayList<>(); @@ -162,9 +164,9 @@ public class TrainModuleUtils { layerInputs.add(Collections.emptyList()); layerInfo.add(Collections.emptyMap()); - if (config.getLayer() instanceof VariationalAutoencoder) { + if (config.getFirstLayer() instanceof VariationalAutoencoder) { //Special case like this is a bit ugly - but it works - VariationalAutoencoder va = (VariationalAutoencoder) config.getLayer(); + VariationalAutoencoder va = (VariationalAutoencoder) config.getFirstLayer(); int[] encLayerSizes = va.getEncoderLayerSizes(); int[] decLayerSizes = va.getDecoderLayerSizes(); @@ -240,14 +242,14 @@ public class TrainModuleUtils { } else { //VAE or similar... - Layer layer = config.getLayer(); + LayerConfiguration layer = config.getFirstLayer(); String layerName = layer.getLayerName(); if (layerName == null) layerName = "layer0"; vertexNames.add(layerName); originalVertexName.add("0"); - String layerType = config.getLayer().getClass().getSimpleName().replaceAll("Layer$", ""); + String layerType = config.getFirstLayer().getClass().getSimpleName().replaceAll("Layer$", ""); layerTypes.add(layerType); layerInputs.add(Collections.singletonList(0)); @@ -256,20 +258,18 @@ public class TrainModuleUtils { Map map = getLayerInfo(config, layer); layerInfo.add(map); } - - return new GraphInfo(vertexNames, layerTypes, layerInputs, layerInfo, originalVertexName); } +**/ - - private static Map getLayerInfo(NeuralNetConfiguration c, Layer layer) { + private static Map getLayerInfo(NeuralNetConfiguration c, LayerConfiguration layer) { Map map = new LinkedHashMap<>(); if (layer instanceof FeedForwardLayer) { FeedForwardLayer layer1 = (FeedForwardLayer) layer; map.put("Input size", String.valueOf(layer1.getNIn())); map.put("Output size", String.valueOf(layer1.getNOut())); - map.put("Num Parameters", String.valueOf(layer1.initializer().numParams(c))); + map.put("Num Parameters", String.valueOf(layer1.initializer().numParams(layer))); map.put("Activation Function", layer1.getActivationFn().toString()); } diff --git a/cavis-ui/cavis-ui-vertx/src/test/java/org/deeplearning4j/ui/TestRemoteReceiver.java b/cavis-ui/cavis-ui-vertx/src/test/java/org/deeplearning4j/ui/TestRemoteReceiver.java index 37a7aab14..8bae39055 100644 --- a/cavis-ui/cavis-ui-vertx/src/test/java/org/deeplearning4j/ui/TestRemoteReceiver.java +++ b/cavis-ui/cavis-ui-vertx/src/test/java/org/deeplearning4j/ui/TestRemoteReceiver.java @@ -27,7 +27,6 @@ import org.deeplearning4j.core.storage.impl.CollectionStatsStorageRouter; import org.deeplearning4j.core.storage.impl.RemoteUIStatsStorageRouter; import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator; import org.deeplearning4j.nn.api.OptimizationAlgorithm; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; @@ -133,8 +132,8 @@ public class TestRemoteReceiver extends BaseDL4JTest { public void testRemoteFull() throws Exception { //Use this in conjunction with startRemoteUI() - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() + .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .layer(0, new DenseLayer.Builder().activation(Activation.TANH).nIn(4).nOut(4).build()) .layer(1, new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(4).nOut(3).build()) diff --git a/cavis-ui/cavis-ui-vertx/src/test/java/org/deeplearning4j/ui/TestVertxUI.java b/cavis-ui/cavis-ui-vertx/src/test/java/org/deeplearning4j/ui/TestVertxUI.java index 988b9b502..694a557bc 100644 --- a/cavis-ui/cavis-ui-vertx/src/test/java/org/deeplearning4j/ui/TestVertxUI.java +++ b/cavis-ui/cavis-ui-vertx/src/test/java/org/deeplearning4j/ui/TestVertxUI.java @@ -31,7 +31,6 @@ import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator; import org.deeplearning4j.exception.DL4JException; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; @@ -60,7 +59,6 @@ import java.nio.charset.StandardCharsets; import java.util.List; import java.util.Map; import java.util.concurrent.CountDownLatch; -import java.util.concurrent.atomic.AtomicReference; import static org.junit.jupiter.api.Assertions.*; @@ -94,10 +92,10 @@ public class TestVertxUI extends BaseDL4JTest { UIServer uiServer = UIServer.getInstance(); uiServer.attach(ss); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(1e-5)) - .list().layer(0, + .layer(0, new VariationalAutoencoder.Builder().nIn(4).nOut(3).encoderLayerSizes(10, 11) .decoderLayerSizes(12, 13).weightInit(WeightInit.XAVIER) .pzxActivationFunction(Activation.IDENTITY) @@ -135,8 +133,8 @@ public class TestVertxUI extends BaseDL4JTest { UIServer uiServer = UIServer.getInstance(); uiServer.attach(ss); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() + .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .layer(0, new DenseLayer.Builder().activation(Activation.TANH).nIn(4).nOut(4).build()) .layer(1, new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(4).nOut(3).build()) @@ -163,7 +161,7 @@ public class TestVertxUI extends BaseDL4JTest { UIServer uiServer = UIServer.getInstance(); uiServer.attach(ss); - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().graphBuilder().addInputs("in") + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().graphBuilder().addInputs("in") .addLayer("L0", new DenseLayer.Builder().activation(Activation.TANH).nIn(4).nOut(4).build(), "in") .addLayer("L1", new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MCXENT) @@ -185,7 +183,7 @@ public class TestVertxUI extends BaseDL4JTest { @Test public void testAutoAttach() throws Exception { - ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().graphBuilder().addInputs("in") + ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().graphBuilder().addInputs("in") .addLayer("L0", new DenseLayer.Builder().activation(Activation.TANH).nIn(4).nOut(4).build(), "in") .addLayer("L1", new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MCXENT) diff --git a/cavis-ui/cavis-ui-vertx/src/test/java/org/deeplearning4j/ui/TestVertxUIManual.java b/cavis-ui/cavis-ui-vertx/src/test/java/org/deeplearning4j/ui/TestVertxUIManual.java index d6f11df5e..bc1ae16a8 100644 --- a/cavis-ui/cavis-ui-vertx/src/test/java/org/deeplearning4j/ui/TestVertxUIManual.java +++ b/cavis-ui/cavis-ui-vertx/src/test/java/org/deeplearning4j/ui/TestVertxUIManual.java @@ -28,7 +28,6 @@ import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.core.storage.StatsStorage; import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator; import org.deeplearning4j.nn.api.OptimizationAlgorithm; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; @@ -92,12 +91,12 @@ public class TestVertxUIManual extends BaseDL4JTest { int numInputs = 4; int outputNum = 3; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .activation(Activation.TANH) .weightInit(WeightInit.XAVIER) .updater(new Sgd(0.03)) .l2(1e-4) - .list() + .layer(0, new DenseLayer.Builder().nIn(numInputs).nOut(3) .build()) .layer(1, new DenseLayer.Builder().nIn(3).nOut(3) @@ -192,8 +191,8 @@ public class TestVertxUIManual extends BaseDL4JTest { ss = new InMemoryStatsStorage(); String sessionId = Integer.toString(session); statsProvider.put(sessionId, ss); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() + .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .layer(0, new DenseLayer.Builder().activation(Activation.TANH).nIn(4).nOut(layerSize).build()) .layer(1, new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(layerSize).nOut(3).build()) diff --git a/cavis-ui/cavis-ui-vertx/src/test/java/org/deeplearning4j/ui/TestVertxUIMultiSession.java b/cavis-ui/cavis-ui-vertx/src/test/java/org/deeplearning4j/ui/TestVertxUIMultiSession.java index 5a774dceb..7da17dafd 100644 --- a/cavis-ui/cavis-ui-vertx/src/test/java/org/deeplearning4j/ui/TestVertxUIMultiSession.java +++ b/cavis-ui/cavis-ui-vertx/src/test/java/org/deeplearning4j/ui/TestVertxUIMultiSession.java @@ -27,7 +27,6 @@ import org.deeplearning4j.core.storage.StatsStorage; import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator; import org.deeplearning4j.exception.DL4JException; import org.deeplearning4j.nn.api.OptimizationAlgorithm; -import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; @@ -87,9 +86,9 @@ public class TestVertxUIMultiSession extends BaseDL4JTest { Thread training = new Thread(() -> { int layerSize = sid + 4; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .updater(new Adam(1e-2)) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() + .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .layer(0, new DenseLayer.Builder().activation(Activation.TANH).nIn(4).nOut(layerSize).build()) .layer(1, new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(layerSize).nOut(3).build()) @@ -153,8 +152,8 @@ public class TestVertxUIMultiSession extends BaseDL4JTest { InMemoryStatsStorage ss = new InMemoryStatsStorage(); String sessionId = Integer.toString(session); statsStorageForSession.put(sessionId, ss); - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() + .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .layer(0, new DenseLayer.Builder().activation(Activation.TANH).nIn(4).nOut(layerSize).build()) .layer(1, new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(layerSize).nOut(3).build()) diff --git a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/InstantiableModel.java b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/InstantiableModel.java index 6045f7ca1..fa72dea55 100644 --- a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/InstantiableModel.java +++ b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/InstantiableModel.java @@ -20,20 +20,20 @@ package org.deeplearning4j.zoo; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; public interface InstantiableModel { void setInputShape(int[][] inputShape); - M init(); + M init(); /** * @deprecated No longer used, will be removed in a future release */ @Deprecated ModelMetaData metaData(); - Class modelType(); + Class modelType(); String pretrainedUrl(PretrainedType pretrainedType); diff --git a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/ZooModel.java b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/ZooModel.java index 958edec33..da2bc3a78 100644 --- a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/ZooModel.java +++ b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/ZooModel.java @@ -21,10 +21,10 @@ package org.deeplearning4j.zoo; import lombok.extern.slf4j.Slf4j; +import net.brutex.ai.dnn.api.IModel; import org.apache.commons.io.FileUtils; import org.deeplearning4j.common.resources.DL4JResources; import org.deeplearning4j.common.resources.ResourceType; -import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.util.ModelSerializer; @@ -48,7 +48,7 @@ public abstract class ZooModel implements InstantiableModel { * @return * @throws IOException */ - public Model initPretrained() throws IOException { + public IModel initPretrained() throws IOException { return initPretrained(PretrainedType.IMAGENET); } @@ -59,7 +59,7 @@ public abstract class ZooModel implements InstantiableModel { * @return * @throws IOException */ - public M initPretrained(PretrainedType pretrainedType) throws IOException { + public M initPretrained(PretrainedType pretrainedType) throws IOException { String remoteUrl = pretrainedUrl(pretrainedType); if (remoteUrl == null) throw new UnsupportedOperationException( diff --git a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/AlexNet.java b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/AlexNet.java index b65441942..9e55c5b26 100644 --- a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/AlexNet.java +++ b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/AlexNet.java @@ -22,13 +22,14 @@ package org.deeplearning4j.zoo.model; import lombok.AllArgsConstructor; import lombok.Builder; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.*; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.*; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; +import org.deeplearning4j.nn.weights.WeightInit; import org.deeplearning4j.zoo.ModelMetaData; import org.deeplearning4j.zoo.PretrainedType; import org.deeplearning4j.zoo.ZooModel; @@ -64,15 +65,16 @@ public class AlexNet extends ZooModel { } @Override - public Class modelType() { + public Class modelType() { return MultiLayerNetwork.class; } - public MultiLayerConfiguration conf() { + public NeuralNetConfiguration conf() { double nonZeroBias = 1; - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(seed) - .weightInit(new NormalDistribution(0.0, 0.01)) + NeuralNetConfiguration conf = NeuralNetConfiguration.builder() + .seed(seed) + .weightInit( WeightInit.NORMAL) //new NormalDistribution(0.0, 0.01)) .activation(Activation.RELU) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(updater) @@ -84,7 +86,7 @@ public class AlexNet extends ZooModel { .cacheMode(cacheMode) .l2(5 * 1e-4) .miniBatch(false) - .list() + .layer(0, new ConvolutionLayer.Builder(new int[]{11,11}, new int[]{4, 4}) .name("cnn1") .cudnnAlgoMode(ConvolutionLayer.AlgoMode.PREFER_FASTEST) @@ -158,15 +160,16 @@ public class AlexNet extends ZooModel { .build()) - .setInputType(InputType.convolutional(inputShape[2], inputShape[1], inputShape[0])) - .build(); + .inputType( InputType.convolutional(inputShape[2], inputShape[1], inputShape[0]) ) + .build() + ; return conf; } @Override public MultiLayerNetwork init() { - MultiLayerConfiguration conf = conf(); + NeuralNetConfiguration conf = conf(); MultiLayerNetwork network = new MultiLayerNetwork(conf); network.init(); return network; diff --git a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/Darknet19.java b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/Darknet19.java index 739493bd8..f2b07ec58 100644 --- a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/Darknet19.java +++ b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/Darknet19.java @@ -22,9 +22,8 @@ package org.deeplearning4j.zoo.model; import lombok.AllArgsConstructor; import lombok.Builder; -import lombok.NoArgsConstructor; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.common.resources.DL4JResources; -import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.nn.conf.*; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration.GraphBuilder; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -80,22 +79,22 @@ public class Darknet19 extends ZooModel { } @Override - public Class modelType() { + public Class modelType() { return ComputationGraph.class; } public ComputationGraphConfiguration conf() { - GraphBuilder graphBuilder = new NeuralNetConfiguration.Builder() + GraphBuilder graphBuilder = ((NeuralNetConfiguration.NeuralNetConfigurationBuilder)NeuralNetConfiguration.builder() .seed(seed) .updater(updater) .weightInit(weightInit) - .l2(0.00001) + .l2(0.00001) .activation(Activation.IDENTITY) .cacheMode(cacheMode) .trainingWorkspaceMode(workspaceMode) .inferenceWorkspaceMode(workspaceMode) - .cudnnAlgoMode(cudnnAlgoMode) - .graphBuilder() + .cudnnAlgoMode(cudnnAlgoMode)) + .graphBuilder() .addInputs("input") .setInputTypes(InputType.convolutional(inputShape[2], inputShape[1], inputShape[0])); diff --git a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/FaceNetNN4Small2.java b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/FaceNetNN4Small2.java index 487401625..07ce6b985 100644 --- a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/FaceNetNN4Small2.java +++ b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/FaceNetNN4Small2.java @@ -22,8 +22,7 @@ package org.deeplearning4j.zoo.model; import lombok.AllArgsConstructor; import lombok.Builder; -import lombok.NoArgsConstructor; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.*; import org.deeplearning4j.nn.conf.graph.L2NormalizeVertex; @@ -69,13 +68,13 @@ public class FaceNetNN4Small2 extends ZooModel { } @Override - public Class modelType() { + public Class modelType() { return ComputationGraph.class; } public ComputationGraphConfiguration conf() { - ComputationGraphConfiguration.GraphBuilder graph = new NeuralNetConfiguration.Builder().seed(seed) + ComputationGraphConfiguration.GraphBuilder graph = ((NeuralNetConfiguration.NeuralNetConfigurationBuilder)NeuralNetConfiguration.builder().seed(seed) .activation(Activation.IDENTITY) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(updater) @@ -86,7 +85,7 @@ public class FaceNetNN4Small2 extends ZooModel { .trainingWorkspaceMode(workspaceMode) .inferenceWorkspaceMode(workspaceMode) .cudnnAlgoMode(cudnnAlgoMode) - .convolutionMode(ConvolutionMode.Same) + .convolutionMode(ConvolutionMode.Same)) .graphBuilder(); diff --git a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/InceptionResNetV1.java b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/InceptionResNetV1.java index 50f14da0b..2d5d69dda 100644 --- a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/InceptionResNetV1.java +++ b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/InceptionResNetV1.java @@ -22,18 +22,15 @@ package org.deeplearning4j.zoo.model; import lombok.AllArgsConstructor; import lombok.Builder; -import lombok.NoArgsConstructor; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.*; -import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.distribution.TruncatedNormalDistribution; import org.deeplearning4j.nn.conf.graph.L2NormalizeVertex; import org.deeplearning4j.nn.conf.graph.MergeVertex; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.*; import org.deeplearning4j.nn.graph.ComputationGraph; -import org.deeplearning4j.nn.weights.WeightInit; import org.deeplearning4j.zoo.ModelMetaData; import org.deeplearning4j.zoo.PretrainedType; import org.deeplearning4j.zoo.ZooModel; @@ -69,7 +66,7 @@ public class InceptionResNetV1 extends ZooModel { } @Override - public Class modelType() { + public Class modelType() { return ComputationGraph.class; } @@ -102,7 +99,8 @@ public class InceptionResNetV1 extends ZooModel { public ComputationGraphConfiguration.GraphBuilder graphBuilder(String input) { - ComputationGraphConfiguration.GraphBuilder graph = new NeuralNetConfiguration.Builder().seed(seed) + ComputationGraphConfiguration.GraphBuilder graph = ((NeuralNetConfiguration.NeuralNetConfigurationBuilder)NeuralNetConfiguration.builder() + .seed(seed) .activation(Activation.RELU) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(updater) @@ -112,7 +110,7 @@ public class InceptionResNetV1 extends ZooModel { .cacheMode(cacheMode) .trainingWorkspaceMode(workspaceMode) .inferenceWorkspaceMode(workspaceMode) - .convolutionMode(ConvolutionMode.Truncate).graphBuilder(); + .convolutionMode(ConvolutionMode.Truncate)).graphBuilder(); graph diff --git a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/LeNet.java b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/LeNet.java index 64a6f8c92..6dc75af5f 100644 --- a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/LeNet.java +++ b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/LeNet.java @@ -22,9 +22,8 @@ package org.deeplearning4j.zoo.model; import lombok.AllArgsConstructor; import lombok.Builder; -import lombok.NoArgsConstructor; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.common.resources.DL4JResources; -import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.*; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -74,12 +73,12 @@ public class LeNet extends ZooModel { } @Override - public Class modelType() { + public Class modelType() { return MultiLayerNetwork.class; } - public MultiLayerConfiguration conf() { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(seed) + public NeuralNetConfiguration conf() { + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(seed) .activation(Activation.IDENTITY) .weightInit(WeightInit.XAVIER) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) @@ -89,7 +88,7 @@ public class LeNet extends ZooModel { .inferenceWorkspaceMode(workspaceMode) .cudnnAlgoMode(cudnnAlgoMode) .convolutionMode(ConvolutionMode.Same) - .list() + // block 1 .layer(new ConvolutionLayer.Builder() .name("cnn1") @@ -128,14 +127,14 @@ public class LeNet extends ZooModel { .nOut(numClasses) .activation(Activation.SOFTMAX) // radial basis function required .build()) - .setInputType(InputType.convolutionalFlat(inputShape[2], inputShape[1], inputShape[0])) + .inputType(InputType.convolutionalFlat(inputShape[2], inputShape[1], inputShape[0])) .build(); return conf; } @Override - public Model init() { + public IModel init() { MultiLayerNetwork network = new MultiLayerNetwork(conf()); network.init(); return network; diff --git a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/NASNet.java b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/NASNet.java index 0e78f819e..35f617773 100644 --- a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/NASNet.java +++ b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/NASNet.java @@ -22,8 +22,8 @@ package org.deeplearning4j.zoo.model; import lombok.AllArgsConstructor; import lombok.Builder; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.common.resources.DL4JResources; -import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.*; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -86,7 +86,7 @@ public class NASNet extends ZooModel { } @Override - public Class modelType() { + public Class modelType() { return ComputationGraph.class; } @@ -110,7 +110,7 @@ public class NASNet extends ZooModel { } int filters = (int) Math.floor(penultimateFilters / 24); - ComputationGraphConfiguration.GraphBuilder graph = new NeuralNetConfiguration.Builder().seed(seed) + ComputationGraphConfiguration.GraphBuilder graph = ((NeuralNetConfiguration.NeuralNetConfigurationBuilder)NeuralNetConfiguration.builder().seed(seed) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(updater) .weightInit(weightInit) @@ -120,7 +120,7 @@ public class NASNet extends ZooModel { .trainingWorkspaceMode(workspaceMode) .inferenceWorkspaceMode(workspaceMode) .cudnnAlgoMode(cudnnAlgoMode) - .convolutionMode(ConvolutionMode.Truncate) + .convolutionMode(ConvolutionMode.Truncate)) .graphBuilder(); if(!skipReduction) { diff --git a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/ResNet50.java b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/ResNet50.java index 2453bb21c..f530e0781 100644 --- a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/ResNet50.java +++ b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/ResNet50.java @@ -22,19 +22,16 @@ package org.deeplearning4j.zoo.model; import lombok.AllArgsConstructor; import lombok.Builder; -import lombok.NoArgsConstructor; import org.deeplearning4j.common.resources.DL4JResources; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.*; -import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.distribution.TruncatedNormalDistribution; import org.deeplearning4j.nn.conf.graph.ElementWiseVertex; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.*; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.weights.IWeightInit; -import org.deeplearning4j.nn.weights.WeightInit; import org.deeplearning4j.nn.weights.WeightInitDistribution; import org.deeplearning4j.zoo.ModelMetaData; import org.deeplearning4j.zoo.PretrainedType; @@ -77,7 +74,7 @@ public class ResNet50 extends ZooModel { } @Override - public Class modelType() { + public Class modelType() { return ComputationGraph.class; } @@ -175,11 +172,11 @@ public class ResNet50 extends ZooModel { public ComputationGraphConfiguration.GraphBuilder graphBuilder() { - ComputationGraphConfiguration.GraphBuilder graph = new NeuralNetConfiguration.Builder().seed(seed) + ComputationGraphConfiguration.GraphBuilder graph = ((NeuralNetConfiguration.NeuralNetConfigurationBuilder)NeuralNetConfiguration.builder().seed(seed) .activation(Activation.IDENTITY) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(updater) - .weightInit(weightInit) + .weightInitFn(weightInit) .l1(1e-7) .l2(5e-5) .miniBatch(true) @@ -187,7 +184,7 @@ public class ResNet50 extends ZooModel { .trainingWorkspaceMode(workspaceMode) .inferenceWorkspaceMode(workspaceMode) .cudnnAlgoMode(cudnnAlgoMode) - .convolutionMode(ConvolutionMode.Truncate) + .convolutionMode(ConvolutionMode.Truncate)) .graphBuilder(); diff --git a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/SimpleCNN.java b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/SimpleCNN.java index 17f22d1f4..f5b1c41ee 100644 --- a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/SimpleCNN.java +++ b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/SimpleCNN.java @@ -22,8 +22,7 @@ package org.deeplearning4j.zoo.model; import lombok.AllArgsConstructor; import lombok.Builder; -import lombok.NoArgsConstructor; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.*; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -63,13 +62,13 @@ public class SimpleCNN extends ZooModel { } @Override - public Class modelType() { + public Class modelType() { return MultiLayerNetwork.class; } - public MultiLayerConfiguration conf() { - MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder().seed(seed) + public NeuralNetConfiguration conf() { + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder().seed(seed) .activation(Activation.IDENTITY) .weightInit(WeightInit.RELU) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) @@ -78,7 +77,7 @@ public class SimpleCNN extends ZooModel { .trainingWorkspaceMode(workspaceMode) .inferenceWorkspaceMode(workspaceMode) .convolutionMode(ConvolutionMode.Same) - .list() + // block 1 .layer(0, new ConvolutionLayer.Builder(new int[] {7, 7}).name("image_array") .nIn(inputShape[0]).nOut(16).build()) @@ -130,7 +129,7 @@ public class SimpleCNN extends ZooModel { .layer(31, new GlobalPoolingLayer.Builder(PoolingType.AVG).build()) .layer(32, new ActivationLayer.Builder().activation(Activation.SOFTMAX).build()) - .setInputType(InputType.convolutional(inputShape[2], inputShape[1], + .inputType(InputType.convolutional(inputShape[2], inputShape[1], inputShape[0])) .build(); @@ -138,7 +137,7 @@ public class SimpleCNN extends ZooModel { } @Override - public Model init() { + public IModel init() { MultiLayerNetwork network = new MultiLayerNetwork(conf()); network.init(); return network; diff --git a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/SqueezeNet.java b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/SqueezeNet.java index 2f77a2d4c..e63e36cea 100644 --- a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/SqueezeNet.java +++ b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/SqueezeNet.java @@ -22,12 +22,10 @@ package org.deeplearning4j.zoo.model; import lombok.AllArgsConstructor; import lombok.Builder; -import lombok.NoArgsConstructor; import org.deeplearning4j.common.resources.DL4JResources; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.*; -import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.graph.MergeVertex; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.*; @@ -79,12 +77,12 @@ public class SqueezeNet extends ZooModel { public ComputationGraph initPretrained(PretrainedType pretrainedType) throws IOException { ComputationGraph cg = (ComputationGraph) super.initPretrained(pretrainedType); //Set collapse dimensions to true in global avg pooling - more useful for users [N,1000] rather than [N,1000,1,1] out. Also matches non-pretrain config - ((GlobalPoolingLayer)cg.getLayer("global_average_pooling2d_5").conf().getLayer()).setCollapseDimensions(true); + ((GlobalPoolingLayer)cg.getLayer("global_average_pooling2d_5").getLayerConfiguration()).setCollapseDimensions(true); return cg; } @Override - public Class modelType() { + public Class modelType() { return ComputationGraph.class; } @@ -103,7 +101,7 @@ public class SqueezeNet extends ZooModel { public ComputationGraphConfiguration.GraphBuilder graphBuilder() { - ComputationGraphConfiguration.GraphBuilder graph = new NeuralNetConfiguration.Builder().seed(seed) + ComputationGraphConfiguration.GraphBuilder graph = ((NeuralNetConfiguration.NeuralNetConfigurationBuilder)NeuralNetConfiguration.builder().seed(seed) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(updater) .weightInit(weightInit) @@ -112,7 +110,7 @@ public class SqueezeNet extends ZooModel { .cacheMode(cacheMode) .trainingWorkspaceMode(workspaceMode) .inferenceWorkspaceMode(workspaceMode) - .convolutionMode(ConvolutionMode.Truncate) + .convolutionMode(ConvolutionMode.Truncate)) .graphBuilder(); diff --git a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/TextGenerationLSTM.java b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/TextGenerationLSTM.java index 432c74231..962b8f677 100644 --- a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/TextGenerationLSTM.java +++ b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/TextGenerationLSTM.java @@ -22,8 +22,7 @@ package org.deeplearning4j.zoo.model; import lombok.AllArgsConstructor; import lombok.Builder; -import lombok.NoArgsConstructor; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.*; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; @@ -66,12 +65,12 @@ public class TextGenerationLSTM extends ZooModel { } @Override - public Class modelType() { + public Class modelType() { return MultiLayerNetwork.class; } - public MultiLayerConfiguration conf() { - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) + public NeuralNetConfiguration conf() { + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .l2(0.001) .weightInit(WeightInit.XAVIER) @@ -80,21 +79,21 @@ public class TextGenerationLSTM extends ZooModel { .trainingWorkspaceMode(workspaceMode) .inferenceWorkspaceMode(workspaceMode) .cudnnAlgoMode(cudnnAlgoMode) - .list() + .layer(0, new GravesLSTM.Builder().nIn(inputShape[1]).nOut(256).activation(Activation.TANH) .build()) .layer(1, new GravesLSTM.Builder().nOut(256).activation(Activation.TANH).build()) .layer(2, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX) //MCXENT + softmax for classification .nOut(totalUniqueCharacters).build()) - .backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(50).tBPTTBackwardLength(50) + .backpropType(BackpropType.TruncatedBPTT).tbpttFwdLength(50).tbpttBackLength(50) .build(); return conf; } @Override - public Model init() { + public IModel init() { MultiLayerNetwork network = new MultiLayerNetwork(conf()); network.init(); return network; diff --git a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/TinyYOLO.java b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/TinyYOLO.java index abbdf06cf..e5281d33d 100644 --- a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/TinyYOLO.java +++ b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/TinyYOLO.java @@ -23,9 +23,8 @@ package org.deeplearning4j.zoo.model; import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Getter; -import lombok.NoArgsConstructor; import org.deeplearning4j.common.resources.DL4JResources; -import org.deeplearning4j.nn.api.Model; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.*; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration.GraphBuilder; @@ -80,14 +79,14 @@ public class TinyYOLO extends ZooModel { } @Override - public Class modelType() { + public Class modelType() { return ComputationGraph.class; } public ComputationGraphConfiguration conf() { INDArray priors = Nd4j.create(priorBoxes); - GraphBuilder graphBuilder = new NeuralNetConfiguration.Builder() + GraphBuilder graphBuilder = ((NeuralNetConfiguration.NeuralNetConfigurationBuilder)NeuralNetConfiguration.builder() .seed(seed) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer) @@ -98,7 +97,7 @@ public class TinyYOLO extends ZooModel { .cacheMode(cacheMode) .trainingWorkspaceMode(workspaceMode) .inferenceWorkspaceMode(workspaceMode) - .cudnnAlgoMode(cudnnAlgoMode) + .cudnnAlgoMode(cudnnAlgoMode)) .graphBuilder() .addInputs("input") .setInputTypes(InputType.convolutional(inputShape[2], inputShape[1], inputShape[0])); diff --git a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/UNet.java b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/UNet.java index ca8136f62..f9400ba8e 100644 --- a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/UNet.java +++ b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/UNet.java @@ -22,11 +22,10 @@ package org.deeplearning4j.zoo.model; import lombok.AllArgsConstructor; import lombok.Builder; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.common.resources.DL4JResources; -import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.*; -import org.deeplearning4j.nn.conf.distribution.TruncatedNormalDistribution; import org.deeplearning4j.nn.conf.graph.MergeVertex; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.*; @@ -73,7 +72,7 @@ public class UNet extends ZooModel { } @Override - public Class modelType() { + public Class modelType() { return ComputationGraph.class; } @@ -92,7 +91,7 @@ public class UNet extends ZooModel { public ComputationGraphConfiguration.GraphBuilder graphBuilder() { - ComputationGraphConfiguration.GraphBuilder graph = new NeuralNetConfiguration.Builder().seed(seed) + ComputationGraphConfiguration.GraphBuilder graph = ((NeuralNetConfiguration.NeuralNetConfigurationBuilder)NeuralNetConfiguration.builder().seed(seed) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(updater) .weightInit(weightInit) @@ -100,7 +99,7 @@ public class UNet extends ZooModel { .miniBatch(true) .cacheMode(cacheMode) .trainingWorkspaceMode(workspaceMode) - .inferenceWorkspaceMode(workspaceMode) + .inferenceWorkspaceMode(workspaceMode)) .graphBuilder(); diff --git a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/VGG16.java b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/VGG16.java index c52d8988d..2f6aa1cac 100644 --- a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/VGG16.java +++ b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/VGG16.java @@ -22,8 +22,8 @@ package org.deeplearning4j.zoo.model; import lombok.AllArgsConstructor; import lombok.Builder; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.common.resources.DL4JResources; -import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.CacheMode; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; @@ -83,19 +83,19 @@ public class VGG16 extends ZooModel { } @Override - public Class modelType() { + public Class modelType() { return ComputationGraph.class; } public ComputationGraphConfiguration conf() { ComputationGraphConfiguration conf = - new NeuralNetConfiguration.Builder().seed(seed) + ((NeuralNetConfiguration.NeuralNetConfigurationBuilder) NeuralNetConfiguration.builder().seed(seed) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(updater) .activation(Activation.RELU) .cacheMode(cacheMode) .trainingWorkspaceMode(workspaceMode) - .inferenceWorkspaceMode(workspaceMode) + .inferenceWorkspaceMode(workspaceMode)) .graphBuilder() .addInputs("in") // block 1 diff --git a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/VGG19.java b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/VGG19.java index ee2bb0725..5e846efda 100644 --- a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/VGG19.java +++ b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/VGG19.java @@ -22,9 +22,8 @@ package org.deeplearning4j.zoo.model; import lombok.AllArgsConstructor; import lombok.Builder; -import lombok.NoArgsConstructor; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.common.resources.DL4JResources; -import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.*; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -33,7 +32,6 @@ import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.conf.layers.SubsamplingLayer; import org.deeplearning4j.nn.graph.ComputationGraph; -import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.zoo.ModelMetaData; import org.deeplearning4j.zoo.PretrainedType; import org.deeplearning4j.zoo.ZooModel; @@ -74,19 +72,19 @@ public class VGG19 extends ZooModel { } @Override - public Class modelType() { + public Class modelType() { return ComputationGraph.class; } public ComputationGraphConfiguration conf() { ComputationGraphConfiguration conf = - new NeuralNetConfiguration.Builder().seed(seed) + ((NeuralNetConfiguration.NeuralNetConfigurationBuilder) NeuralNetConfiguration.builder().seed(seed) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(updater) .activation(Activation.RELU) .cacheMode(cacheMode) .trainingWorkspaceMode(workspaceMode) - .inferenceWorkspaceMode(workspaceMode) + .inferenceWorkspaceMode(workspaceMode)) .graphBuilder() .addInputs("in") // block 1 diff --git a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/Xception.java b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/Xception.java index 4c851fa08..bbba3ff3c 100644 --- a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/Xception.java +++ b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/Xception.java @@ -22,12 +22,10 @@ package org.deeplearning4j.zoo.model; import lombok.AllArgsConstructor; import lombok.Builder; -import lombok.NoArgsConstructor; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.common.resources.DL4JResources; -import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.*; -import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.graph.ElementWiseVertex; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.*; @@ -39,7 +37,6 @@ import org.deeplearning4j.zoo.ZooModel; import org.deeplearning4j.zoo.ZooType; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.learning.config.AdaDelta; -import org.nd4j.linalg.learning.config.AdaGrad; import org.nd4j.linalg.learning.config.IUpdater; import org.nd4j.linalg.lossfunctions.LossFunctions; @@ -75,7 +72,7 @@ public class Xception extends ZooModel { } @Override - public Class modelType() { + public Class modelType() { return ComputationGraph.class; } @@ -94,7 +91,7 @@ public class Xception extends ZooModel { public ComputationGraphConfiguration.GraphBuilder graphBuilder() { - ComputationGraphConfiguration.GraphBuilder graph = new NeuralNetConfiguration.Builder().seed(seed) + ComputationGraphConfiguration.GraphBuilder graph =((NeuralNetConfiguration.NeuralNetConfigurationBuilder) NeuralNetConfiguration.builder().seed(seed) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(updater) .weightInit(weightInit) @@ -103,7 +100,7 @@ public class Xception extends ZooModel { .cacheMode(cacheMode) .trainingWorkspaceMode(workspaceMode) .inferenceWorkspaceMode(workspaceMode) - .convolutionMode(ConvolutionMode.Truncate) + .convolutionMode(ConvolutionMode.Truncate)) .graphBuilder(); diff --git a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/YOLO2.java b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/YOLO2.java index 030a5c46b..3c28a36a0 100644 --- a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/YOLO2.java +++ b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/YOLO2.java @@ -23,9 +23,8 @@ package org.deeplearning4j.zoo.model; import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Getter; -import lombok.NoArgsConstructor; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.common.resources.DL4JResources; -import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.*; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration.GraphBuilder; @@ -87,14 +86,14 @@ public class YOLO2 extends ZooModel { } @Override - public Class modelType() { + public Class modelType() { return ComputationGraph.class; } public ComputationGraphConfiguration conf() { INDArray priors = Nd4j.create(priorBoxes); - GraphBuilder graphBuilder = new NeuralNetConfiguration.Builder() + GraphBuilder graphBuilder = ((NeuralNetConfiguration.NeuralNetConfigurationBuilder)NeuralNetConfiguration.builder() .seed(seed) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer) @@ -105,7 +104,7 @@ public class YOLO2 extends ZooModel { .cacheMode(cacheMode) .trainingWorkspaceMode(workspaceMode) .inferenceWorkspaceMode(workspaceMode) - .cudnnAlgoMode(cudnnAlgoMode) + .cudnnAlgoMode(cudnnAlgoMode)) .graphBuilder() .addInputs("input") .setInputTypes(InputType.convolutional(inputShape[2], inputShape[1], inputShape[0])); diff --git a/cavis-zoo/cavis-zoo-models/src/test/java/org/deeplearning4j/zoo/TestInstantiation.java b/cavis-zoo/cavis-zoo-models/src/test/java/org/deeplearning4j/zoo/TestInstantiation.java index f9e8b83a1..2bf9e7ed1 100644 --- a/cavis-zoo/cavis-zoo-models/src/test/java/org/deeplearning4j/zoo/TestInstantiation.java +++ b/cavis-zoo/cavis-zoo-models/src/test/java/org/deeplearning4j/zoo/TestInstantiation.java @@ -21,10 +21,10 @@ package org.deeplearning4j.zoo; import lombok.extern.slf4j.Slf4j; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.datasets.iterator.AsyncDataSetIterator; import org.deeplearning4j.datasets.iterator.impl.BenchmarkDataSetIterator; -import org.deeplearning4j.nn.api.Model; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.LossLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; @@ -40,7 +40,6 @@ import org.junit.jupiter.api.Test; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.dataset.api.DataSet; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.lossfunctions.LossFunctions; @@ -108,7 +107,7 @@ public class TestInstantiation extends BaseDL4JTest { new int[]{8, inputShape[0], inputShape[1], inputShape[2]}, numClasses, 1, gridWidth, gridHeight); - Model initializedModel = model.init(); + IModel initializedModel = model.init(); AsyncDataSetIterator async = new AsyncDataSetIterator(iter); if (initializedModel instanceof MultiLayerNetwork) { ((MultiLayerNetwork) initializedModel).fit(async); diff --git a/cavis-zoo/cavis-zoo-models/src/test/java/org/deeplearning4j/zoo/TestUtils.java b/cavis-zoo/cavis-zoo-models/src/test/java/org/deeplearning4j/zoo/TestUtils.java index 44d9dff3c..240cabfcc 100644 --- a/cavis-zoo/cavis-zoo-models/src/test/java/org/deeplearning4j/zoo/TestUtils.java +++ b/cavis-zoo/cavis-zoo-models/src/test/java/org/deeplearning4j/zoo/TestUtils.java @@ -45,7 +45,7 @@ public class TestUtils { ByteArrayInputStream bais = new ByteArrayInputStream(bytes); MultiLayerNetwork restored = ModelSerializer.restoreMultiLayerNetwork(bais, true); - assertEquals(net.getLayerWiseConfigurations(), restored.getLayerWiseConfigurations()); + assertEquals(net.getConfiguration(), restored.getConfiguration()); assertEquals(net.params(), restored.params()); return restored;